From 375156937a0783432c5d18e199b5d8d2b3ec33f7 Mon Sep 17 00:00:00 2001 From: ramelg01 Date: Sat, 26 Feb 2022 22:06:20 +0000 Subject: Implementation of ClPooling3d - For NDHWC layout - For F16 and F32 data types - Mixed Precision stil not supported Resolves: COMPMID-4670 Signed-off-by: ramy.elgammal@arm.com Change-Id: I0e14a13e4625569e8e5ee67e6033bd1efe0da469 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7262 Comments-Addressed: Arm Jenkins Reviewed-by: SiCong Li Reviewed-by: Gunes Bayir Tested-by: Arm Jenkins --- src/core/CL/ICLKernel.cpp | 3 +- src/core/CL/ICLKernel.h | 10 ++ src/core/CL/cl_kernels/helpers.h | 16 +- src/core/CL/cl_kernels/nhwc/pooling_3d_layer.cl | 190 ++++++++++++++++++++ src/core/Utils.cpp | 51 +++++- src/gpu/cl/ClKernelLibrary.cpp | 5 + src/gpu/cl/kernels/ClPool3dKernel.cpp | 225 ++++++++++++++++++++++++ src/gpu/cl/kernels/ClPool3dKernel.h | 73 ++++++++ src/gpu/cl/operators/ClPool3d.cpp | 57 ++++++ src/gpu/cl/operators/ClPool3d.h | 65 +++++++ src/runtime/CL/functions/CLPooling3dLayer.cpp | 73 ++++++++ 11 files changed, 765 insertions(+), 3 deletions(-) create mode 100644 src/core/CL/cl_kernels/nhwc/pooling_3d_layer.cl create mode 100644 src/gpu/cl/kernels/ClPool3dKernel.cpp create mode 100644 src/gpu/cl/kernels/ClPool3dKernel.h create mode 100644 src/gpu/cl/operators/ClPool3d.cpp create mode 100644 src/gpu/cl/operators/ClPool3d.h create mode 100644 src/runtime/CL/functions/CLPooling3dLayer.cpp (limited to 'src') diff --git a/src/core/CL/ICLKernel.cpp b/src/core/CL/ICLKernel.cpp index 9bbc710c88..109a076e9a 100644 --- a/src/core/CL/ICLKernel.cpp +++ b/src/core/CL/ICLKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -173,6 +173,7 @@ template void ICLKernel::add_tensor_argument<1>(unsigned &idx, const ICLTensor * template void ICLKernel::add_tensor_argument<2>(unsigned &idx, const ICLTensor *tensor, const Window &window); template void ICLKernel::add_tensor_argument<3>(unsigned &idx, const ICLTensor *tensor, const Window &window); template void ICLKernel::add_tensor_argument<4>(unsigned &idx, const ICLTensor *tensor, const Window &window); +template void ICLKernel::add_tensor_argument<5>(unsigned &idx, const ICLTensor *tensor, const Window &window); #endif /* DOXYGEN_SKIP_THIS */ void ICLKernel::set_target(cl::Device &device) diff --git a/src/core/CL/ICLKernel.h b/src/core/CL/ICLKernel.h index 4c8028e42a..046679e34e 100644 --- a/src/core/CL/ICLKernel.h +++ b/src/core/CL/ICLKernel.h @@ -238,6 +238,16 @@ public: { add_tensor_argument<4>(idx, tensor, window); } + /** Add the passed 5D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_5D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window) + { + add_tensor_argument<5>(idx, tensor, window); + } /** Add the passed NHW 3D tensor's parameters to the object's kernel's arguments by passing strides, dimensions and the offset to the first valid element in bytes. * diff --git a/src/core/CL/cl_kernels/helpers.h b/src/core/CL/cl_kernels/helpers.h index bfb693e376..4018c40b16 100644 --- a/src/core/CL/cl_kernels/helpers.h +++ b/src/core/CL/cl_kernels/helpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -831,6 +831,20 @@ uint name##_step_w, \ uint name##_offset_first_element_in_bytes +#define TENSOR5D_DECLARATION(name) \ + __global uchar *name##_ptr, \ + uint name##_stride_x, \ + uint name##_step_x, \ + uint name##_stride_y, \ + uint name##_step_y, \ + uint name##_stride_z, \ + uint name##_step_z, \ + uint name##_stride_w, \ + uint name##_step_w, \ + uint name##_stride_v, \ + uint name##_step_v, \ + uint name##_offset_first_element_in_bytes + #define CONVERT_TO_VECTOR_STRUCT(name) \ update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x) diff --git a/src/core/CL/cl_kernels/nhwc/pooling_3d_layer.cl b/src/core/CL/cl_kernels/nhwc/pooling_3d_layer.cl new file mode 100644 index 0000000000..7c6414312f --- /dev/null +++ b/src/core/CL/cl_kernels/nhwc/pooling_3d_layer.cl @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "helpers.h" +#include "tile_helpers.h" // Needed for GET_SPATIAL_IDX() + +#if defined(POOL_AVG) || defined(POOL_L2) +#define POOL_OP(x, y) ((x) + (y)) +#else /* defined(POOL_AVG) || defined(POOL_L2) */ +#define POOL_OP(x, y) (fmax((x), (y))) +#endif /* defined(POOL_AVG) || defined(POOL_L2) */ + +#define SQRT_OP(x) sqrt((x)) + +#if defined(VEC_SIZE) && defined(VEC_SIZE_LEFTOVER) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) && defined(SRC_DEPTH) && defined(DST_CHANNELS) && defined(DST_HEIGHT) && defined(DST_DEPTH) && defined(DST_BATCH_SIZE) && defined(ACC_DATA_TYPE) + +#if defined(POOL_SIZE_X) && defined(POOL_SIZE_Y) && defined(POOL_SIZE_Z) + +/** Performs 3d pooling layer of size equal to MxNXD. This OpenCL kernel can perform the following pooling types: + * -# max, -DPOOL_MAX must be passed at compile time + * -# average, -DPOOL_AVG must be passed at compile time. If padding has to be excluded, -DEXCLUDE_PADDING should be passed at compile time + * -# l2 normalisation, -DPOOL_L2 must be passed at compile time + * + * @note Datatype must be passed at compile type using -DDATA_TYPE e.g. -DDATA_TYPE=half. Supported data types are F32/F16 + * @note Accumulation data type must be passed at compile time using -DACC_DATA_TYPE e.g. -DACC_DATA_TYPE=float + * @note If -DFP_MIXED_PRECISION is passed at compile time, the kernel will use F32 for the partial result + * @note Pool size must be passed at compile time using -DPOOL_SIZE_X, -DPOOL_SIZE_Y, and -DPOOL_SIZE_Z. e.g. -DPOOL_SIZE_X=4, -DPOOL_SIZE_Y=4, -DPOOL_SIZE_Z=2 + * @note Input tensor width, height and depth must be passed at compile time using -DSRC_WIDTH, -DSRC_HEIGHT, and -DSRC_DEPTH + * @note Output tensor height, channels, depth, and batch size must be passed at compile time using -DDST_HEIGHT, -DDST_CHANNELS, -DDST_DEPTH, and -DDST_BATCH_SIZE + * @note Pool strides must be passed at compile time using -DSTRIDE_X, -DSTRIDE_Y and -DSTRIDE_Z which are the steps of the window along the x, y and z directions + * @note Pool pads must be passed at compile time using -DPAD_X, -DPAD_Y, -DPAD_Z + * @note Vector size must be passed at compile time using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 + * @note Leftover vector size must be passed at compile time using -DVEC_SIZE_LEFTOVER. e.g. -DVEC_SIZE_LEFTOVER=3. It is defined as the remainder between the input's first dimension and VEC_SIZE + * @note The initial value for the pooling operation must be passed at compile time using -DINITIAL_VALUE e.g. -DINITIAL_VALUE=0 + * + * @param[in] input_ptr Pointer to the source tensor. Supported data types: F32/F16 + * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes) + * @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes) + * @param[in] input_stride_v Stride of the source tensor in V dimension (in bytes) + * @param[in] input_step_v input_stride_v * number of elements along V processed per workitem(in bytes) + * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr + * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] output_stride_w Stride of the destination tensor in W dimension (in bytes) + * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes) + * @param[in] output_stride_v Stride of the destination tensor in V dimension (in bytes) + * @param[in] output_step_v output_stride_v * number of elements along V processed per workitem(in bytes) + * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor + */ +__kernel void pooling_3d_layer_MxN_ndhwc( + TENSOR5D_DECLARATION(input), + TENSOR5D_DECLARATION(output)) +{ + // Note: If C is not multiple of VEC_SIZE, we shift back of VEC_SIZE_LEFTOVER elements to compute the leftover elements for get_global_id(0) == 0 + // Note: If C is less than VEC_SIZE, VEC_SIZE should be SHRINKED to the closest smaller VEC_SIZE. This operation is performed on the host side + int idx_out_c = GET_SPATIAL_IDX(0, VEC_SIZE, VEC_SIZE_LEFTOVER); + int idx_out_w = GET_SPATIAL_IDX(1, 1, 0); + + // The depth size dimension and the batch size dimension are collapsed over the height dimension + int idx_out_h = GET_SPATIAL_IDX(2, 1, 0) % DST_HEIGHT; + int idx_out_d = (GET_SPATIAL_IDX(2, 1, 0) / DST_HEIGHT) % DST_DEPTH; + int idx_out_n = (GET_SPATIAL_IDX(2, 1, 0) / DST_HEIGHT) / DST_DEPTH; + + __global unsigned char *in_base_ptr = input_ptr + input_offset_first_element_in_bytes + idx_out_c * sizeof(DATA_TYPE) + idx_out_n * input_stride_v; + + __global unsigned char *out_base_ptr = output_ptr + output_offset_first_element_in_bytes + idx_out_c * sizeof(DATA_TYPE) + idx_out_w * output_stride_y + idx_out_h * output_stride_z + idx_out_d * + output_stride_w + idx_out_n * output_stride_v; + + VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE) + res0 = INITIAL_VALUE; + + int idx_in_w = idx_out_w * STRIDE_X - (int)PAD_X; + int idx_in_h = idx_out_h * STRIDE_Y - (int)PAD_Y; + int idx_in_d = idx_out_d * STRIDE_Z - (int)PAD_Z; + + // The start of width to consider in calculation should exclude padding + int pool_x_s = max((int)0, -idx_in_w); + // Assumed Symmetric Padding (left padding = right padding = PAD_X), the filter end should be either the pool width or what is remaining from current pos to the (src width + pad right) + int pool_x_e = min((int)POOL_SIZE_X, (int)SRC_WIDTH + PAD_X - idx_in_w); + int pool_y_s = max((int)0, -idx_in_h); + int pool_y_e = min((int)POOL_SIZE_Y, (int)SRC_HEIGHT + PAD_Y - idx_in_h); + int pool_z_s = max((int)0, -idx_in_d); + int pool_z_e = min((int)POOL_SIZE_Z, (int)SRC_DEPTH + PAD_Z - idx_in_d); + + // The filter size with all padding in all directions considered. + int filter_size = pool_z_e * pool_y_e * pool_x_e; + + // The end of width to consider in calculation should exclude PAD_X + pool_x_e = min(pool_x_e, SRC_WIDTH - idx_in_w); + pool_y_e = min(pool_y_e, SRC_HEIGHT - idx_in_h); + pool_z_e = min(pool_z_e, SRC_DEPTH - idx_in_d); + +#if defined(EXCLUDE_PADDING) + filter_size = (pool_z_e - pool_z_s) * (pool_y_e - pool_y_s) * (pool_x_e - pool_x_s); +#endif // defined(EXCLUDE_PADDING) + +#if POOL_SIZE_X == SRC_WIDTH && POOL_SIZE_Y == SRC_HEIGHT && POOL_SIZE_Z == SRC_DEPTH && PAD_X == 0 && PAD_Y == 0 && PAD_Z == 0 + // Global pooling path + for(int z = 0; z < POOL_SIZE_Z; ++z) + { + int depth_offset_src = (z + idx_in_d) * input_stride_w; + for(int y = 0; y < POOL_SIZE_Y; ++y) + { + int height_offset_src = (y + idx_in_h) * input_stride_z; +#pragma unroll 8 + for(int x = 0; x < POOL_SIZE_X; ++x) + { + int width_offset_src = (x + idx_in_w) * input_stride_y; +#else // POOL_SIZE_X == SRC_WIDTH && POOL_SIZE_Y == SRC_HEIGHT && POOL_SIZE_Z == SRC_DEPTH && PAD_X == 0 && PAD_Y == 0 && PAD_Z == 0 + for(int z = pool_z_s; z < pool_z_e; ++z) + { + int depth_offset_src = (z + idx_in_d) * input_stride_w; + for(int y = pool_y_s; y < pool_y_e; ++y) + { + int height_offset_src = (y + idx_in_h) * input_stride_z; +#pragma unroll 8 + for(int x = pool_x_s; x < pool_x_e; ++x) + { + int width_offset_src = (x + idx_in_w) * input_stride_y; +#endif // POOL_SIZE_X == SRC_WIDTH && POOL_SIZE_Y == SRC_HEIGHT && POOL_SIZE_Z == SRC_DEPTH && PAD_X == 0 && PAD_Y == 0 && PAD_Z == 0 + VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE) + data0; +#if defined(FP_MIXED_PRECISION) + // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE + data0 = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr + width_offset_src + height_offset_src + depth_offset_src)), + VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)); +#else // defined(FP_MIXED_PRECISION) + data0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr + width_offset_src + height_offset_src + depth_offset_src)); +#endif // defined(FP_MIXED_PRECISION) + +#if defined(POOL_L2) + // Raise to power of 2 for L2 Pooling + data0 *= data0; +#endif // defined(POOL_L2) + res0 = POOL_OP(res0, data0); + } + } + } + +#if defined(POOL_AVG) || defined(POOL_L2) + res0 /= (VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE))filter_size; +#endif // defined(POOL_AVG) || defined(POOL_L2) + +#if defined(POOL_L2) + // Take square root of the result in L2 pooling + res0 = SQRT_OP(res0); +#endif // defined(POOL_L2) + + // Store result +#if defined(FP_MIXED_PRECISION) + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + res_converted0 = CONVERT(res0, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)); + STORE_VECTOR_SELECT(res_converted, DATA_TYPE, out_base_ptr, VEC_SIZE, VEC_SIZE_LEFTOVER, (VEC_SIZE_LEFTOVER != 0) && get_global_id(0) == 0); +#else // defined(FP_MIXED_PRECISION) + STORE_VECTOR_SELECT(res, DATA_TYPE, out_base_ptr, VEC_SIZE, VEC_SIZE_LEFTOVER, (VEC_SIZE_LEFTOVER != 0) && get_global_id(0) == 0); +#endif // defined(FP_MIXED_PRECISION) +} +#endif // defined(POOL_SIZE_X) && defined(POOL_SIZE_Y) && defined(POOL_SIZE_Z) +#endif // defined(VEC_SIZE) && defined(VEC_SIZE_LEFTOVER) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) && defined(SRC_DEPTH) && defined(DST_CHANNELS) && defined(DST_HEIGHT) && defined(DST_DEPTH) && defined(DST_BATCH_SIZE) && defined(ACC_DATA_TYPE) diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp index ef16cb5e75..904362e1b4 100644 --- a/src/core/Utils.cpp +++ b/src/core/Utils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -244,6 +244,19 @@ bool is_pool_region_entirely_outside_input(const PoolingLayerInfo &info) return pool_le_padding_x || pool_le_padding_y; } +bool is_pool_3d_region_entirely_outside_input(const Pooling3dLayerInfo &info) +{ + if(info.is_global_pooling || info.pool_size.x() == 0 || info.pool_size.y() == 0 || info.pool_size.z() == 0) + { + return false; + } + const auto ps = info.padding; + const auto pool_le_padding_x = info.pool_size.x() <= std::max({ ps.left, ps.right }); + const auto pool_le_padding_y = info.pool_size.y() <= std::max({ ps.top, ps.bottom }); + const auto pool_le_padding_z = info.pool_size.z() <= std::max({ ps.front, ps.back }); + return pool_le_padding_x || pool_le_padding_y || pool_le_padding_z; +} + const std::string &string_from_gemmlowp_output_stage(GEMMLowpOutputStageType output_stage) { static std::map output_stage_map = @@ -474,6 +487,42 @@ std::pair scaled_dimensions_signed(int width, int height, return std::make_pair(static_cast(w), static_cast(h)); } +std::tuple scaled_3d_dimensions_signed(int width, int height, int depth, + int kernel_width, int kernel_height, int kernel_depth, + const Pooling3dLayerInfo &pool3d_info) +{ + const int pad_left = pool3d_info.padding.left; + const int pad_top = pool3d_info.padding.top; + const int pad_right = pool3d_info.padding.right; + const int pad_bottom = pool3d_info.padding.bottom; + const int pad_front = pool3d_info.padding.front; + const int pad_back = pool3d_info.padding.back; + const int stride_x = pool3d_info.stride.x(); + const int stride_y = pool3d_info.stride.y(); + const int stride_z = pool3d_info.stride.z(); + int w = 0; + int h = 0; + int d = 0; + + switch(pool3d_info.round_type) + { + case DimensionRoundingType::FLOOR: + w = static_cast(std::floor((static_cast(width + pad_left + pad_right - kernel_width) / stride_x) + 1)); + h = static_cast(std::floor((static_cast(height + pad_top + pad_bottom - kernel_height) / stride_y) + 1)); + d = static_cast(std::floor((static_cast(depth + pad_front + pad_back - kernel_depth) / stride_z) + 1)); + break; + case DimensionRoundingType::CEIL: + w = static_cast(std::ceil((static_cast(width + pad_left + pad_right - kernel_width) / stride_x) + 1)); + h = static_cast(std::ceil((static_cast(height + pad_top + pad_bottom - kernel_height) / stride_y) + 1)); + d = static_cast(std::ceil((static_cast(depth + pad_front + pad_back - kernel_depth) / stride_z) + 1)); + break; + default: + ARM_COMPUTE_ERROR("Unsupported rounding type"); + } + + return std::make_tuple(static_cast(w), static_cast(h), static_cast(d)); +} + bool needs_serialized_reduction(ReductionOperation op, DataType dt, unsigned int axis) { const bool is_min_max = (op == ReductionOperation::MAX || op == ReductionOperation::MIN); diff --git a/src/gpu/cl/ClKernelLibrary.cpp b/src/gpu/cl/ClKernelLibrary.cpp index bab5342168..a5d37f49c4 100644 --- a/src/gpu/cl/ClKernelLibrary.cpp +++ b/src/gpu/cl/ClKernelLibrary.cpp @@ -437,6 +437,7 @@ const std::map ClKernelLibrary::_kernel_program_map = { "pooling_layer_MxN_nhwc", "nhwc/pooling_layer.cl" }, { "pooling_layer_2x2_nhwc", "nhwc/pooling_layer.cl" }, { "pooling_layer_MxN_quantized_nhwc", "nhwc/pooling_layer_quantized.cl" }, + { "pooling_3d_layer_MxN_ndhwc", "nhwc/pooling_3d_layer.cl" }, { "reorg_layer_nhwc", "nhwc/reorg_layer.cl" }, { "scale_nearest_neighbour_nhwc", "nhwc/scale.cl" }, { "scale_bilinear_nhwc", "nhwc/scale.cl" }, @@ -880,6 +881,10 @@ const std::map ClKernelLibrary::_program_source_map = { "nhwc/pooling_layer.cl", #include "./cl_kernels/nhwc/pooling_layer.clembed" + }, + { + "nhwc/pooling_3d_layer.cl", +#include "./cl_kernels/nhwc/pooling_3d_layer.clembed" }, { "nhwc/pooling_layer_quantized.cl", diff --git a/src/gpu/cl/kernels/ClPool3dKernel.cpp b/src/gpu/cl/kernels/ClPool3dKernel.cpp new file mode 100644 index 0000000000..929ccf7cb6 --- /dev/null +++ b/src/gpu/cl/kernels/ClPool3dKernel.cpp @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/gpu/cl/kernels/ClPool3dKernel.h" + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/Cast.h" +#include "utils/TypePrinter.h" + +namespace arm_compute +{ +namespace opencl +{ +namespace kernels +{ +using namespace arm_compute::misc::shape_calculator; + +namespace +{ +Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const Pooling3dLayerInfo &pool_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_layout() != DataLayout::NDHWC, "Only NDHWC layout supported"); + + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src); + ARM_COMPUTE_RETURN_ERROR_ON_MSG((pool_info.stride.x() == 0 || pool_info.stride.y() == 0 || pool_info.stride.z() == 0), "Strides cannot be zero."); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32); + + const auto data_layout = src->data_layout(); + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int idx_depth = get_data_layout_dimension_index(data_layout, DataLayoutDimension::DEPTH); + const bool is_global_pooling = pool_info.is_global_pooling; + const unsigned int pool_size_x = is_global_pooling ? src->dimension(idx_width) : pool_info.pool_size.width; + const unsigned int pool_size_y = is_global_pooling ? src->dimension(idx_height) : pool_info.pool_size.height; + const unsigned int pool_size_z = is_global_pooling ? src->dimension(idx_depth) : pool_info.pool_size.depth; + int output_width = 0; + int output_height = 0; + int output_depth = 0; + + bool round_type_ceil_with_asymm_padding = (pool_info.round_type == DimensionRoundingType::CEIL) && (!is_symmetric(pool_info.padding)); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(round_type_ceil_with_asymm_padding, "Cannot use dimension round type CEIL when padding is asymmetric."); + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_pool_3d_region_entirely_outside_input(pool_info), "Pooling region that is entirely outside input tensor is unsupported"); + std::tie(output_width, output_height, output_depth) = scaled_3d_dimensions_signed(src->tensor_shape()[idx_width], src->tensor_shape()[idx_height], + src->tensor_shape()[idx_depth], pool_size_x, pool_size_y, + pool_size_z, pool_info); + + ARM_COMPUTE_RETURN_ERROR_ON_MSG((output_width < 1 || output_height < 1 || output_depth < 1), "Calculated output dimension size is invalid"); + // Checks performed when dst is configured + if(dst->total_size() != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst); + TensorInfo out_info(TensorInfo(compute_pool3d_shape(src->tensor_shape(), pool_info), 1, dst->data_type())); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(dst, &out_info); + } + + return Status{}; +} +} // namespace + +ClPool3dKernel::ClPool3dKernel() +{ + _type = CLKernelType::POOL; +} + +void ClPool3dKernel::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const Pooling3dLayerInfo &pool_info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, pool_info)); + auto padding_info = get_padding_info({ src, dst }); + + // Auto init if empty + TensorShape out_shape = compute_pool3d_shape(src->tensor_shape(), pool_info); + auto_init_if_empty(*dst, src->clone()->set_tensor_shape(out_shape)); + + // Set instance variables + _pool_info = pool_info; + _data_layout = src->data_layout(); + + _num_elems_processed_per_iteration = (dst->data_type() == DataType::F32) ? 2 : 4; + _num_elems_processed_per_iteration = adjust_vec_size(_num_elems_processed_per_iteration, dst->dimension(0)); + + const PoolingType pool_type = pool_info.pool_type; + const int idx_width = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); + const int idx_depth = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::DEPTH); + const int idx_channel = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::CHANNEL); + const int idx_batch_size = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::BATCHES); + const int pool_size_x = pool_info.is_global_pooling ? src->dimension(idx_width) : pool_info.pool_size.width; + const int pool_size_y = pool_info.is_global_pooling ? src->dimension(idx_height) : pool_info.pool_size.height; + const int pool_size_z = pool_info.is_global_pooling ? src->dimension(idx_depth) : pool_info.pool_size.depth; + const bool exclude_padding = pool_info.exclude_padding; + const int pool_stride_x = pool_info.stride.x(); + const int pool_stride_y = pool_info.stride.y(); + const int pool_stride_z = pool_info.stride.z(); + const int pool_pad_top = pool_info.padding.top; + const int pool_pad_left = pool_info.padding.left; + const int pool_pad_front = pool_info.padding.front; + const DataType data_type = src->data_type(); + + // Set build options + CLBuildOptions build_opts; + build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(_num_elems_processed_per_iteration)); + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)); + build_opts.add_option("-DPOOL_" + string_from_pooling_type(pool_type)); + build_opts.add_option("-DSTRIDE_X=" + support::cpp11::to_string(pool_stride_x)); + build_opts.add_option("-DSTRIDE_Y=" + support::cpp11::to_string(pool_stride_y)); + build_opts.add_option("-DSTRIDE_Z=" + support::cpp11::to_string(pool_stride_z)); + build_opts.add_option("-DPAD_X=" + support::cpp11::to_string(pool_pad_left)); + build_opts.add_option("-DPAD_Y=" + support::cpp11::to_string(pool_pad_top)); + build_opts.add_option("-DPAD_Z=" + support::cpp11::to_string(pool_pad_front)); + build_opts.add_option("-DPOOL_SIZE_X=" + support::cpp11::to_string(pool_size_x)); + build_opts.add_option("-DPOOL_SIZE_Y=" + support::cpp11::to_string(pool_size_y)); + build_opts.add_option("-DPOOL_SIZE_Z=" + support::cpp11::to_string(pool_size_z)); + build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(src->dimension(idx_width))); + build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(src->dimension(idx_height))); + build_opts.add_option("-DSRC_DEPTH=" + support::cpp11::to_string(src->dimension(idx_depth))); + + // Set the initial value for the pooling operation accordingly with the data type + if(pool_type == PoolingType::MAX) + { + build_opts.add_option("-DINITIAL_VALUE=" + float_to_string_with_full_precision(std::numeric_limits::lowest())); + } + else + { + // Pool AVG and Pool L2 initial value + build_opts.add_option("-DINITIAL_VALUE=0"); + } + // Create kernel + // Floating point mixed precision is support on F16 only + const auto use_fp_mixed_precision = (data_type == DataType::F16) && pool_info.fp_mixed_precision && pool_type != PoolingType::MAX; + + // Wider accumulation is required to avoid accuracy loss + // Case 1: Floating point mixed precision (fp16 src data and fp32 accumulation) + DataType acc_data_type = data_type; + if(use_fp_mixed_precision) + { + acc_data_type = DataType::F32; + } + build_opts.add_option("-DACC_DATA_TYPE=" + get_cl_type_from_data_type(acc_data_type)); + build_opts.add_option_if(use_fp_mixed_precision, "-DFP_MIXED_PRECISION"); + build_opts.add_option_if(exclude_padding, "-DEXCLUDE_PADDING"); + build_opts.add_option("-DDST_HEIGHT=" + support::cpp11::to_string(dst->dimension(idx_height))); + build_opts.add_option("-DDST_DEPTH=" + support::cpp11::to_string(dst->dimension(idx_depth))); + build_opts.add_option("-DDST_CHANNELS=" + support::cpp11::to_string(dst->dimension(idx_channel))); + build_opts.add_option("-DDST_BATCH_SIZE=" + support::cpp11::to_string(dst->dimension(idx_batch_size))); + build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(src->dimension(0) % _num_elems_processed_per_iteration)); + std::string kernel_name = "pooling_3d_layer_MxN_ndhwc"; + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); + + // Configure kernel window + Window win = calculate_max_window(*dst, Steps(_num_elems_processed_per_iteration)); + ICLKernel::configure_internal(win); + + // Set config_id for enabling LWS tuning + _config_id = "pooling_layer_3d"; + _config_id += lower_string(string_from_data_type(data_type)); + _config_id += "_"; + _config_id += lower_string(string_from_data_layout(_data_layout)); + _config_id += "_"; + _config_id += support::cpp11::to_string(dst->dimension(idx_width)); + _config_id += "_"; + _config_id += support::cpp11::to_string(dst->dimension(idx_height)); + _config_id += "_"; + _config_id += support::cpp11::to_string(dst->dimension(idx_channel)); + _config_id += "_"; + _config_id += lower_string(string_from_data_layout(src->data_layout())); + + ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); +} + +Status ClPool3dKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const Pooling3dLayerInfo &pool_info) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst, pool_info)); + return Status{}; +} + +void ClPool3dKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + + const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST_0)); + + // Collapse 3D window + Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); + + // Set CL kernel arguments + unsigned int idx = 0; + // Passing of the window not needed, as the steps are not used for the pool3d kernel + add_5D_tensor_argument(idx, src, window); + add_5D_tensor_argument(idx, dst, window); + enqueue(queue, *this, window_collapsed, lws_hint()); +} +} // namespace kernels +} // namespace opencl +} // namespace arm_compute diff --git a/src/gpu/cl/kernels/ClPool3dKernel.h b/src/gpu/cl/kernels/ClPool3dKernel.h new file mode 100644 index 0000000000..30c76ed632 --- /dev/null +++ b/src/gpu/cl/kernels/ClPool3dKernel.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_POOL3D_KERNEL_H +#define ARM_COMPUTE_CL_POOL3D_KERNEL_H + +#include "src/core/common/Macros.h" +#include "src/gpu/cl/ClCompileContext.h" +#include "src/gpu/cl/IClKernel.h" + +namespace arm_compute +{ +namespace opencl +{ +namespace kernels +{ +/** Interface for the pooling layer kernel */ +class ClPool3dKernel : public IClKernel +{ +public: + ClPool3dKernel(); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClPool3dKernel); + + /** Configure kernel for a given list of arguments + * + * @note Asymmetric padding is not supported when dimension rounding type == CEIL. + * + * @param[in] compile_context The compile context to be used. + * @param[in] src Source tensor info. Data types supported: F16/F32. + * @param[out] dst Destination tensor info. Data types supported: same as @p src. + * @param[in] pool_info Contains pooling operation information described in @ref Pooling3dLayerInfo. + */ + void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const Pooling3dLayerInfo &pool_info); + /** Static function to check if given info will lead to a valid configuration + * + * Similar to ClPool3dKernel::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const Pooling3dLayerInfo &pool_info); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; + +private: + Pooling3dLayerInfo _pool_info{}; + DataLayout _data_layout{ DataLayout::UNKNOWN }; + unsigned int _num_elems_processed_per_iteration{ 1 }; +}; +} // namespace kernels +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_CL_POOL3D_KERNEL_H */ diff --git a/src/gpu/cl/operators/ClPool3d.cpp b/src/gpu/cl/operators/ClPool3d.cpp new file mode 100644 index 0000000000..7dec6c5958 --- /dev/null +++ b/src/gpu/cl/operators/ClPool3d.cpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/gpu/cl/operators/ClPool3d.h" + +#include "arm_compute/runtime/CL/CLScheduler.h" + +#include "src/gpu/cl/ClCompileContext.h" +#include "src/gpu/cl/kernels/ClPool3dKernel.h" + +#include "src/common/utils/Log.h" + +namespace arm_compute +{ +namespace opencl +{ +void ClPool3d::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const Pooling3dLayerInfo &info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(src); + ARM_COMPUTE_LOG_PARAMS(src, dst, info); + + // Configure pooling kernel + auto k = std::make_unique(); + k->set_target(CLScheduler::get().target()); + k->configure(compile_context, src, dst, info); + _kernel = std::move(k); + + // Tune kernels + CLScheduler::get().tune_kernel_static(*_kernel); +} + +Status ClPool3d::validate(const ITensorInfo *src, const ITensorInfo *dst, const Pooling3dLayerInfo &info) +{ + return kernels::ClPool3dKernel::validate(src, dst, info); +} +} // namespace opencl +} // namespace arm_compute diff --git a/src/gpu/cl/operators/ClPool3d.h b/src/gpu/cl/operators/ClPool3d.h new file mode 100644 index 0000000000..7d994fd194 --- /dev/null +++ b/src/gpu/cl/operators/ClPool3d.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_POOL3D_H +#define ARM_COMPUTE_CL_POOL3D_H + +#include "src/gpu/cl/ClCompileContext.h" +#include "src/gpu/cl/IClOperator.h" + +#include + +namespace arm_compute +{ +namespace opencl +{ +/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following OpenCL kernels: + * + * -# @ref opencl::ClPool3d + */ +class ClPool3d : public IClOperator +{ +public: + /** Constructor */ + ClPool3d() = default; + /** Configure operator for a given list of arguments + * + * @note Asymmetric padding is not supported when dimension rounding type == CEIL. + * + * @param[in] compile_context The compile context to be used. + * @param[in] src Source tensor info. + * @param[out] dst Destination tensor info. + * @param[in] info 3d Pooling layer parameters. + */ + void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const Pooling3dLayerInfo &info); + /** Static function to check if given info will lead to a valid configuration + * + * Similar to ClPool3d::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const Pooling3dLayerInfo &info); +}; +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_CL_POOL3D_H */ diff --git a/src/runtime/CL/functions/CLPooling3dLayer.cpp b/src/runtime/CL/functions/CLPooling3dLayer.cpp new file mode 100644 index 0000000000..11ae1d0fe6 --- /dev/null +++ b/src/runtime/CL/functions/CLPooling3dLayer.cpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLPooling3dLayer.h" + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "src/core/CL/ICLKernel.h" +#include "src/gpu/cl/operators/ClPool3d.h" + +namespace arm_compute +{ +struct CLPooling3dLayer::Impl +{ + const ICLTensor *src{ nullptr }; + ICLTensor *dst{ nullptr }; + ICLTensor *indices{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLPooling3dLayer::CLPooling3dLayer() + : _impl(std::make_unique()) +{ +} +CLPooling3dLayer::~CLPooling3dLayer() = default; + +void CLPooling3dLayer::configure(const ICLTensor *input, ICLTensor *output, const Pooling3dLayerInfo &pool_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, pool_info); +} + +void CLPooling3dLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Pooling3dLayerInfo &pool_info) +{ + _impl->src = input; + _impl->dst = output; + + _impl->op = std::make_unique(); + _impl->op->configure(compile_context, input->info(), output->info(), pool_info); +} + +Status CLPooling3dLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const Pooling3dLayerInfo &pool_info) +{ + return opencl::ClPool3d::validate(input, output, pool_info); +} + +void CLPooling3dLayer::run() +{ + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, _impl->src); + pack.add_tensor(TensorType::ACL_DST_0, _impl->dst); + _impl->op->run(pack); +} +} // namespace arm_compute -- cgit v1.2.1