diff options
author | Manuel Bottini <manuel.bottini@arm.com> | 2021-06-30 18:29:18 +0100 |
---|---|---|
committer | Manuel Bottini <manuel.bottini@arm.com> | 2021-07-06 11:03:31 +0000 |
commit | 900289936c458eff95499e0a0eaba989a27aaa4d (patch) | |
tree | 305853a38fd66842d19aa1a2d1cad88a70b946bc | |
parent | 6132c7aeaf6230a4e8b074309327762a9e4be003 (diff) | |
download | ComputeLibrary-900289936c458eff95499e0a0eaba989a27aaa4d.tar.gz |
Port NEIm2ColKernel
Resolves: COMPMID-4510
Change-Id: Ia3e588f599449d975dabad4afafb2974dd44d0ad
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5899
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r-- | Android.bp | 2 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h | 2 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h | 13 | ||||
-rw-r--r-- | docs/user_guide/release_version_and_change_log.dox | 4 | ||||
-rw-r--r-- | filelist.json | 2 | ||||
-rw-r--r-- | src/core/NEON/NEKernels.h | 1 | ||||
-rw-r--r-- | src/core/NEON/kernels/NECol2ImKernel.h | 2 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEWeightsReshapeKernel.h | 2 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuIm2ColKernel.cpp (renamed from src/core/NEON/kernels/NEIm2ColKernel.cpp) | 144 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuIm2ColKernel.h (renamed from src/core/NEON/kernels/NEIm2ColKernel.h) | 90 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp | 18 | ||||
-rw-r--r-- | tests/validation/NEON/Im2Col.cpp | 49 | ||||
-rw-r--r-- | tests/validation/fixtures/Im2ColFixture.h | 91 |
13 files changed, 250 insertions, 170 deletions
diff --git a/Android.bp b/Android.bp index 670f0697d7..621d013e8b 100644 --- a/Android.bp +++ b/Android.bp @@ -159,7 +159,6 @@ cc_library_static { "src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp", "src/core/NEON/kernels/NEGatherKernel.cpp", "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp", - "src/core/NEON/kernels/NEIm2ColKernel.cpp", "src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp", "src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp", "src/core/NEON/kernels/NELogicalKernel.cpp", @@ -275,6 +274,7 @@ cc_library_static { "src/core/cpu/kernels/CpuGemmMatrixAdditionKernel.cpp", "src/core/cpu/kernels/CpuGemmMatrixMultiplyKernel.cpp", "src/core/cpu/kernels/CpuGemmTranspose1xWKernel.cpp", + "src/core/cpu/kernels/CpuIm2ColKernel.cpp", "src/core/cpu/kernels/CpuMulKernel.cpp", "src/core/cpu/kernels/CpuPermuteKernel.cpp", "src/core/cpu/kernels/CpuPool2dKernel.cpp", diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h index e409a61ba1..43f1d4cc05 100644 --- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -77,7 +77,7 @@ private: } // namespace weights_transformations /** Basic function to compute a Fully Connected layer. This function calls the following kernels: - * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer) + * -# @ref cpu::kernels::CpuIm2ColKernel (called when the input comes from a convolutional layer) * -# @ref NETranspose (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once) * -# @ref NEGEMM or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric) * -# @ref cpu::kernels::CpuGemmMatrixAdditionKernel or @ref NEGEMMLowpOutputStage (if quantized asymmetric) (if @p biases is not equal to nullptr) diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h index d334d518e2..655d733bd1 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h @@ -41,8 +41,14 @@ namespace arm_compute { class ITensor; class NECol2ImKernel; -class NEIm2ColKernel; class NEWeightsReshapeKernel; +namespace cpu +{ +namespace kernels +{ +class CpuIm2ColKernel; +} // namespace kernels +} // namespace cpu /** Function to reshape the weights. This function calls the following kernel: * -# @ref NEWeightsReshapeKernel @@ -152,7 +158,7 @@ private: /** Basic function to compute the convolution layer. This function calls the following kernels/functions: * - * -# @ref NEIm2ColKernel + * -# @ref cpu::kernels::CpuIm2ColKernel * -# @ref NEGEMM (if the data type is BFLOAT16/FP16/FP32) * -# @ref NEGEMMLowpMatrixMultiplyCore (if the data type is QASYMM8/QASYMM8_SIGNED) * -# @ref NEGEMMLowpOutputStage (if the data type is QASYMM8/QASYMM8_SIGNED) @@ -283,12 +289,13 @@ private: IWeightsManager *_weights_manager; NEConvolutionLayerReshapeWeights _reshape_weights; weights_transformations::NEConvolutionLayerReshapeWeightsTransform _reshape_weights_managed; - std::unique_ptr<NEIm2ColKernel> _im2col_kernel; + std::unique_ptr<cpu::kernels::CpuIm2ColKernel> _im2col_kernel; NEGEMM _mm_gemm; NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; std::unique_ptr<NECol2ImKernel> _col2im_kernel; NEReshapeLayer _reshape_layer; + const ITensor *_input; const ITensor *_original_weights; const ITensor *_original_output; diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox index 0c8b57ff9f..78c13041ee 100644 --- a/docs/user_guide/release_version_and_change_log.dox +++ b/docs/user_guide/release_version_and_change_log.dox @@ -585,7 +585,7 @@ v20.05 Public major release - Added Bfloat16 support in: - @ref NEWeightsReshapeKernel - @ref NEConvolutionLayerReshapeWeights - - @ref NEIm2ColKernel + - NEIm2ColKernel - NEIm2Col - NEDepthConvertLayerKernel - @ref NEDepthConvertLayer @@ -1362,7 +1362,7 @@ v17.03.1 First Major public release of the sources - @ref NENormalizationLayerKernel / @ref NENormalizationLayer - NETransposeKernel / @ref NETranspose - NELogits1DMaxKernel, NELogits1DShiftExpSumKernel, NELogits1DNormKernel / @ref NESoftmaxLayer - - @ref NEIm2ColKernel, @ref NECol2ImKernel, NEConvolutionLayerWeightsReshapeKernel / @ref NEConvolutionLayer + - NEIm2ColKernel, @ref NECol2ImKernel, NEConvolutionLayerWeightsReshapeKernel / @ref NEConvolutionLayer - NEGEMMMatrixAccumulateBiasesKernel / @ref NEFullyConnectedLayer - @ref NEGEMMLowpMatrixMultiplyKernel / NEGEMMLowp diff --git a/filelist.json b/filelist.json index 7512ac12bd..9562cc7115 100644 --- a/filelist.json +++ b/filelist.json @@ -1330,7 +1330,7 @@ "Im2Col": { "files": { "kernel": [ - "src/core/NEON/kernels/NEIm2ColKernel.cpp" + "src/core/cpu/kernels/CpuIm2ColKernel.cpp" ] } }, diff --git a/src/core/NEON/NEKernels.h b/src/core/NEON/NEKernels.h index 665c8c7fba..69c8d7bebc 100644 --- a/src/core/NEON/NEKernels.h +++ b/src/core/NEON/NEKernels.h @@ -47,7 +47,6 @@ #include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" #include "src/core/NEON/kernels/NEGatherKernel.h" #include "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" -#include "src/core/NEON/kernels/NEIm2ColKernel.h" #include "src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h" #include "src/core/NEON/kernels/NEL2NormalizeLayerKernel.h" #include "src/core/NEON/kernels/NELogicalKernel.h" diff --git a/src/core/NEON/kernels/NECol2ImKernel.h b/src/core/NEON/kernels/NECol2ImKernel.h index 397bf5ab17..1976302036 100644 --- a/src/core/NEON/kernels/NECol2ImKernel.h +++ b/src/core/NEON/kernels/NECol2ImKernel.h @@ -34,7 +34,7 @@ class ITensor; /** Kernel to perform col2im reshaping. * - * Rearranges each matrix column into image blocks. It's the inverse operation of @ref NEIm2ColKernel. + * Rearranges each matrix column into image blocks. It's the inverse operation of @ref cpu::kernels::CpuIm2ColKernel. * * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3: * diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.h b/src/core/NEON/kernels/NEWeightsReshapeKernel.h index 76eca9fe86..5701c84cac 100644 --- a/src/core/NEON/kernels/NEWeightsReshapeKernel.h +++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.h @@ -33,7 +33,7 @@ class ITensor; /** Kernel to perform reshaping on the weights used by convolution and locally connected layer * * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels. - * In combination with the @ref NEIm2ColKernel can transform a convolution to a matrix multiplication. + * In combination with the @ref cpu::kernels::CpuIm2ColKernel can transform a convolution to a matrix multiplication. * * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have: * @f[ diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/cpu/kernels/CpuIm2ColKernel.cpp index a28a77a4fb..a5dbcc29c8 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/cpu/kernels/CpuIm2ColKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/NEON/kernels/NEIm2ColKernel.h" +#include "src/core/cpu/kernels/CpuIm2ColKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" @@ -42,9 +42,13 @@ #include <cstring> #include <tuple> -using namespace arm_compute; +namespace arm_compute +{ using namespace misc::shape_calculator; - +namespace cpu +{ +namespace kernels +{ namespace { Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, @@ -75,33 +79,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c return Status{}; } -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, - bool has_bias, const Size2D &dilation) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - // Output tensor auto initialization if not yet initialized - auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_im2col_conv_shape(input, kernel_dims, conv_info, has_bias, dilation, false))); - - const DataLayout data_layout = input->data_layout(); - const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const unsigned int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - - std::pair<unsigned int, unsigned int> convolved_dims = scaled_dimensions(input->dimension(width_idx), input->dimension(height_idx), - kernel_dims.width, kernel_dims.height, - conv_info, dilation); - - Window win = calculate_max_window(*input, Steps()); - win.set(width_idx, Window::Dimension(0, convolved_dims.first, 1)); - win.set(height_idx, Window::Dimension(0, convolved_dims.second, 1)); - win.set(channel_idx, Window::Dimension(0, 1, 1)); - - // The NEIm2ColKernel doesn't need padding so update_window_and_padding() can be skipped - - return std::make_pair(Status{}, win); -} - template <typename T, bool has_pads> inline void linearize_volume_nchw(const uint8_t *const in_ptr, T *out_ptr, @@ -272,26 +249,26 @@ inline void linearize_volume_nhwc(const uint8_t *const in_ptr, } // namespace template <typename T, bool has_pads, bool is_nchw> -void NEIm2ColKernel::run_im2col(const Window &window) +void CpuIm2ColKernel::run_im2col(const ITensor *src, ITensor *dst, const Window &window) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window); const unsigned int width_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH); const unsigned int height_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); const unsigned int channel_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::CHANNEL); - const int input_w = _input->info()->dimension(width_idx); - const int input_h = _input->info()->dimension(height_idx); - const int input_c = _input->info()->dimension(channel_idx); - const int input_stride_x = _input->info()->strides_in_bytes().x(); - const int input_stride_y = _input->info()->strides_in_bytes().y(); - const int input_stride_z = _input->info()->strides_in_bytes().z(); + const int input_w = src->info()->dimension(width_idx); + const int input_h = src->info()->dimension(height_idx); + const int input_c = src->info()->dimension(channel_idx); + const int input_stride_x = src->info()->strides_in_bytes().x(); + const int input_stride_y = src->info()->strides_in_bytes().y(); + const int input_stride_z = src->info()->strides_in_bytes().z(); const int pad_left = _conv_info.pad_left(); const int pad_top = _conv_info.pad_top(); const int stride_x = _conv_info.stride().first; const int stride_y = _conv_info.stride().second; - const int pad_value = is_data_type_quantized(_input->info()->data_type()) ? _input->info()->quantization_info().uniform().offset : 0; + const int pad_value = is_data_type_quantized(src->info()->data_type()) ? src->info()->quantization_info().uniform().offset : 0; Window window_in_out(window); // The first three dimensions of the input and output are increased by the inner loops @@ -300,8 +277,8 @@ void NEIm2ColKernel::run_im2col(const Window &window) window_in_out.set(Window::DimZ, Window::Dimension(0, 0, 0)); // Create iterators - Iterator in(_input, window_in_out); - Iterator out(_output, window_in_out); + Iterator in(src, window_in_out); + Iterator out(dst, window_in_out); execute_window_loop(window, [&](const Coordinates & id) { @@ -310,7 +287,7 @@ void NEIm2ColKernel::run_im2col(const Window &window) // Get pointers const uint8_t *const input_ptr = in.ptr(); - auto output_ptr = reinterpret_cast<T *>(out.ptr() + (id[width_idx] + id[height_idx] * _convolved_dims.first) * _output->info()->strides_in_bytes().y()); + auto output_ptr = reinterpret_cast<T *>(out.ptr() + (id[width_idx] + id[height_idx] * _convolved_dims.first) * dst->info()->strides_in_bytes().y()); // Linearize volume if(is_nchw) @@ -354,53 +331,47 @@ void NEIm2ColKernel::run_im2col(const Window &window) in, out); } -NEIm2ColKernel::NEIm2ColKernel() - : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_width(0), _kernel_height(0), _has_bias(false), _dilation(1U, 1U), _data_layout(DataLayout::UNKNOWN) -{ -} - -void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, - bool has_bias, const Size2D &dilation, unsigned int num_groups) +void CpuIm2ColKernel::configure(ITensorInfo *src, ITensorInfo *dst, const Size2D &kernel_dims, const PadStrideInfo &conv_info, + bool has_bias, const Size2D &dilation, unsigned int num_groups) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias, dilation, num_groups)); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, kernel_dims, conv_info, has_bias, dilation, num_groups)); ARM_COMPUTE_UNUSED(num_groups); - _data_layout = input->info()->data_layout(); - const unsigned int width_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH); - const unsigned int height_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); + _data_layout = src->data_layout(); + const unsigned int width_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH); + const unsigned int height_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); + const unsigned int channel_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::CHANNEL); - _input = input; - _output = output; _conv_info = conv_info; _kernel_width = kernel_dims.width; _kernel_height = kernel_dims.height; _dilation = dilation; - _convolved_dims = scaled_dimensions(input->info()->dimension(width_idx), input->info()->dimension(height_idx), + _convolved_dims = scaled_dimensions(src->dimension(width_idx), dst->dimension(height_idx), _kernel_width, _kernel_height, _conv_info, _dilation); _has_bias = has_bias; if(_data_layout == DataLayout::NCHW) { - switch(_input->info()->data_type()) + switch(src->data_type()) { case DataType::F32: - _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<float, false, true> : &NEIm2ColKernel::run_im2col<float, true, true>; + _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<float, false, true> : &CpuIm2ColKernel::run_im2col<float, true, true>; break; #if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) case DataType::BFLOAT16: - _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<bfloat16, false, true> : &NEIm2ColKernel::run_im2col<bfloat16, true, true>; + _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<bfloat16, false, true> : &CpuIm2ColKernel::run_im2col<bfloat16, true, true>; break; #endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */ #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: - _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<float16_t, false, true> : &NEIm2ColKernel::run_im2col<float16_t, true, true>; + _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<float16_t, false, true> : &CpuIm2ColKernel::run_im2col<float16_t, true, true>; break; #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ case DataType::QASYMM8_SIGNED: case DataType::QASYMM8: - _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<qasymm8_t, false, true> : &NEIm2ColKernel::run_im2col<qasymm8_t, true, true>; + _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<qasymm8_t, false, true> : &CpuIm2ColKernel::run_im2col<qasymm8_t, true, true>; break; default: ARM_COMPUTE_ERROR("Data type not supported"); @@ -409,26 +380,26 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size } else { - switch(_input->info()->data_type()) + switch(src->data_type()) { case DataType::F32: - _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<float, false, false> : &NEIm2ColKernel::run_im2col<float, true, false>; + _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<float, false, false> : &CpuIm2ColKernel::run_im2col<float, true, false>; break; #if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) case DataType::BFLOAT16: - _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<bfloat16, false, false> : &NEIm2ColKernel::run_im2col<bfloat16, true, false>; + _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<bfloat16, false, false> : &CpuIm2ColKernel::run_im2col<bfloat16, true, false>; break; #endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */ #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: - _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<float16_t, false, false> : &NEIm2ColKernel::run_im2col<float16_t, true, false>; + _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<float16_t, false, false> : &CpuIm2ColKernel::run_im2col<float16_t, true, false>; break; #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ case DataType::QASYMM8: - _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<uint8_t, false, false> : &NEIm2ColKernel::run_im2col<qasymm8_t, true, false>; + _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<uint8_t, false, false> : &CpuIm2ColKernel::run_im2col<qasymm8_t, true, false>; break; case DataType::QASYMM8_SIGNED: - _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<int8_t, false, false> : &NEIm2ColKernel::run_im2col<qasymm8_t, true, false>; + _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<int8_t, false, false> : &CpuIm2ColKernel::run_im2col<qasymm8_t, true, false>; break; default: ARM_COMPUTE_ERROR("Data type not supported"); @@ -436,25 +407,42 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size } } + // Output tensor auto initialization if not yet initialized + auto_init_if_empty(*dst, src->clone()->set_tensor_shape(compute_im2col_conv_shape(src, kernel_dims, conv_info, has_bias, dilation, false))); + + std::pair<unsigned int, unsigned int> convolved_dims = scaled_dimensions(src->dimension(width_idx), src->dimension(height_idx), + kernel_dims.width, kernel_dims.height, + conv_info, dilation); + + Window win = calculate_max_window(*src, Steps()); + win.set(width_idx, Window::Dimension(0, convolved_dims.first, 1)); + win.set(height_idx, Window::Dimension(0, convolved_dims.second, 1)); + win.set(channel_idx, Window::Dimension(0, 1, 1)); // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info(), kernel_dims, conv_info, has_bias, dilation); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - INEKernel::configure(win_config.second); + ICpuKernel::configure(win); } -Status NEIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, - bool has_bias, const Size2D &dilation, unsigned int num_groups) +Status CpuIm2ColKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const Size2D &kernel_dims, const PadStrideInfo &conv_info, + bool has_bias, const Size2D &dilation, unsigned int num_groups) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, kernel_dims, conv_info, has_bias, dilation, num_groups)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), kernel_dims, conv_info, has_bias, dilation).first); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst, kernel_dims, conv_info, has_bias, dilation, num_groups)); return Status{}; } -void NEIm2ColKernel::run(const Window &window, const ThreadInfo &info) +void CpuIm2ColKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window); - (this->*_func)(window); + auto src = tensors.get_const_tensor(TensorType::ACL_SRC); + auto dst = tensors.get_tensor(TensorType::ACL_DST); + (this->*_func)(src, dst, window); +} +const char *CpuIm2ColKernel::name() const +{ + return "CpuIm2ColKernel"; } +} // namespace kernels +} // namespace cpu +} // namespace arm_compute
\ No newline at end of file diff --git a/src/core/NEON/kernels/NEIm2ColKernel.h b/src/core/cpu/kernels/CpuIm2ColKernel.h index 6c1c631d82..4301a237fe 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.h +++ b/src/core/cpu/kernels/CpuIm2ColKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,16 +21,20 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NEIM2COLKERNEL_H -#define ARM_COMPUTE_NEIM2COLKERNEL_H +#ifndef ARM_COMPUTE_CPU_IM2COL_KERNEL_H +#define ARM_COMPUTE_CPU_IM2COL_KERNEL_H -#include "src/core/NEON/INEKernel.h" +#include "arm_compute/core/Size2D.h" +#include "src/core/common/Macros.h" +#include "src/core/cpu/ICpuKernel.h" namespace arm_compute { class ITensor; -class Size2D; - +namespace cpu +{ +namespace kernels +{ /** Interface for the im2col reshape kernel. * * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column. @@ -54,86 +58,66 @@ class Size2D; * \end{array} \right) * @f] */ -class NEIm2ColKernel : public INEKernel +class CpuIm2ColKernel : public ICpuKernel { public: - const char *name() const override - { - return "NEIm2ColKernel"; - } /** Default constructor */ - NEIm2ColKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEIm2ColKernel(const NEIm2ColKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEIm2ColKernel &operator=(const NEIm2ColKernel &) = delete; - /** Allow instances of this class to be moved */ - NEIm2ColKernel(NEIm2ColKernel &&) = default; - /** Allow instances of this class to be moved */ - NEIm2ColKernel &operator=(NEIm2ColKernel &&) = default; - /** Default destructor */ - ~NEIm2ColKernel() = default; - + CpuIm2ColKernel() = default; + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuIm2ColKernel); /** Set the input and output of the kernel. * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * @param[in] src The input tensor info to convert. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32 * Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false - * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[out] dst The output tensor info. Data types supported: Same as @p input * @param[in] kernel_dims The kernel dimensions (width and height). * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] has_bias In case biases are provided expands the matrix with 1. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported */ - void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, + void configure(ITensorInfo *src, ITensorInfo *dst, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1); - /** Static function to check if given info will lead to a valid configuration of @ref NEIm2ColKernel + /** Static function to check if given info will lead to a valid configuration * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32 - * Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported + * Similar to CpuIm2ColKernel::configure() * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, + static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1); // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + const char *name() const override; private: /** Template function to run im2col * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + * @param[in] src The input tensor info + * @param[out] dst The output tensor info + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). */ template <typename T, bool has_pads, bool is_nchw> - void run_im2col(const Window &window); + void run_im2col(const ITensor *src, ITensor *dst, const Window &window); /** Common signature for all the specialised im2col functions * * @param[in] window Region on which to execute the kernel. */ - using Im2ColFunctionPtr = void (NEIm2ColKernel::*)(const Window &window); + using Im2ColFunctionPtr = void (CpuIm2ColKernel::*)(const ITensor *src, ITensor *dst, const Window &window); - Im2ColFunctionPtr _func; - const ITensor *_input; - ITensor *_output; - std::pair<unsigned int, unsigned int> _convolved_dims; - PadStrideInfo _conv_info; - unsigned int _kernel_width; - unsigned int _kernel_height; - bool _has_bias; - Size2D _dilation; - DataLayout _data_layout; + Im2ColFunctionPtr _func{ nullptr }; + std::pair<unsigned int, unsigned int> _convolved_dims{}; + PadStrideInfo _conv_info{}; + unsigned int _kernel_width{ 0 }; + unsigned int _kernel_height{ 0 }; + bool _has_bias{ false }; + Size2D _dilation{ 1U, 1U }; + DataLayout _data_layout{ DataLayout::UNKNOWN }; }; +} // namespace kernels +} // namespace cpu } // namespace arm_compute -#endif /*ARM_COMPUTE_NEIM2COLKERNEL_H */ +#endif /*ARM_COMPUTE_CPU_IM2COL_KERNEL_H */ diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index f40cbda779..f333364289 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -31,8 +31,8 @@ #include "arm_compute/runtime/NEON/NEScheduler.h" #include "src/core/NEON/kernels/NECol2ImKernel.h" -#include "src/core/NEON/kernels/NEIm2ColKernel.h" #include "src/core/NEON/kernels/NEWeightsReshapeKernel.h" +#include "src/core/cpu/kernels/CpuIm2ColKernel.h" #include <set> #include <tuple> @@ -99,7 +99,7 @@ NEGEMMConvolutionLayer::~NEGEMMConvolutionLayer() = default; NEGEMMConvolutionLayer::NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager, IWeightsManager *weights_manager) : _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager), - _col2im_kernel(), _reshape_layer(), _original_weights(nullptr), _original_output(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _gemm_output_3d(), _tmp_output(), + _col2im_kernel(), _reshape_layer(), _input(nullptr), _original_weights(nullptr), _original_output(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _gemm_output_3d(), _tmp_output(), _data_layout(DataLayout::NCHW), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _is_prepared(false) { } @@ -269,6 +269,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig const unsigned int kernel_width = weights->info()->dimension(idx_width); const unsigned int kernel_height = weights->info()->dimension(idx_height); + _input = input; _is_prepared = weights_info.retain_internal_weights(); _original_weights = weights; _original_output = output; @@ -332,8 +333,8 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig _memory_group.manage(&_im2col_output); // Configure - _im2col_kernel = std::make_unique<NEIm2ColKernel>(); - _im2col_kernel->configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, false, dilation); + _im2col_kernel = std::make_unique<cpu::kernels::CpuIm2ColKernel>(); + _im2col_kernel->configure(input->info(), _im2col_output.info(), Size2D(kernel_width, kernel_height), conv_info, false, dilation); // Update GEMM input gemm_input_to_use = &_im2col_output; @@ -521,7 +522,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI im2col_reshaped_info = TensorInfo(shape_im2col, 1, data_type); im2col_reshaped_info.set_quantization_info(input->quantization_info()); - ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation)); + ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuIm2ColKernel::validate(input, &im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation)); gemm_input_to_use = &im2col_reshaped_info; } @@ -563,7 +564,12 @@ void NEGEMMConvolutionLayer::run() { // Run input reshaping unsigned int y_dim = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); - NEScheduler::get().schedule(_im2col_kernel.get(), y_dim); + ITensorPack pack = + { + { TensorType::ACL_SRC, _input }, + { TensorType::ACL_DST, &_im2col_output } + }; + NEScheduler::get().schedule_op(_im2col_kernel.get(), y_dim, _im2col_kernel->window(), pack); } // Handle the case where output has top/bottom padding diff --git a/tests/validation/NEON/Im2Col.cpp b/tests/validation/NEON/Im2Col.cpp index 156957a601..f338675346 100644 --- a/tests/validation/NEON/Im2Col.cpp +++ b/tests/validation/NEON/Im2Col.cpp @@ -22,7 +22,7 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" -#include "src/core/NEON/kernels/NEIm2ColKernel.h" +#include "src/core/cpu/kernels/CpuIm2ColKernel.h" #include "tests/NEON/Accessor.h" #include "tests/NEON/Helper.h" #include "tests/datasets/ShapeDatasets.h" @@ -57,7 +57,7 @@ const auto conv_args_small = combine(combine(combine(combine(conv_filter TEST_SUITE(NEON) TEST_SUITE(Im2Col) -using NEIm2Col = NESynthetizeFunction<NEIm2ColKernel>; +using CpuIm2Col = NESynthetizeFunctionWithZeroConstantKernelBorder<cpu::kernels::CpuIm2ColKernel>; // *INDENT-OFF* // clang-format off @@ -78,26 +78,26 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( framework::dataset::make("Expected", { false, false, false, false, true })), input_info, output_info, has_bias, expected) { - bool status = bool(NEIm2Col::validate(&input_info, &output_info, Size2D(3U, 3U), PadStrideInfo(), has_bias)); + bool status = bool(cpu::kernels::CpuIm2ColKernel::validate(&input_info, &output_info, Size2D(3U, 3U), PadStrideInfo(), has_bias)); ARM_COMPUTE_EXPECT(status == expected, framework::LogLevel::ERRORS); } // clang-format on // *INDENT-ON* template <typename T> -using NEIm2ColFixture = Im2ColValidationFixture<Tensor, Accessor, NEIm2Col, T, false>; +using CpuIm2ColFixture = Im2ColOpValidationFixture<Tensor, Accessor, CpuIm2Col, T, false>; TEST_SUITE(Float) TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEIm2ColFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(im2col_shapes, framework::dataset::make("DataType", DataType::F32)), - conv_args_small)) +FIXTURE_DATA_TEST_CASE(RunSmall, CpuIm2ColFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(im2col_shapes, framework::dataset::make("DataType", DataType::F32)), + conv_args_small)) { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEIm2ColFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(concat(im2col_shapes, datasets::LargeShapes()), framework::dataset::make("DataType", - DataType::F32)), - conv_args)) +FIXTURE_DATA_TEST_CASE(RunLarge, CpuIm2ColFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(concat(im2col_shapes, datasets::LargeShapes()), framework::dataset::make("DataType", + DataType::F32)), + conv_args)) { // Validate output validate(Accessor(_target), _reference); @@ -107,15 +107,15 @@ TEST_SUITE_END() // FP32 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEIm2ColFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(im2col_shapes, framework::dataset::make("DataType", DataType::F16)), - conv_args_small)) +FIXTURE_DATA_TEST_CASE(RunSmall, CpuIm2ColFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(im2col_shapes, framework::dataset::make("DataType", DataType::F16)), + conv_args_small)) { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEIm2ColFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(concat(im2col_shapes, datasets::LargeShapes()), framework::dataset::make("DataType", - DataType::F16)), - conv_args)) +FIXTURE_DATA_TEST_CASE(RunLarge, CpuIm2ColFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(concat(im2col_shapes, datasets::LargeShapes()), framework::dataset::make("DataType", + DataType::F16)), + conv_args)) { // Validate output validate(Accessor(_target), _reference); @@ -127,15 +127,15 @@ TEST_SUITE_END() // FP16 TEST_SUITE_END() // Float TEST_SUITE(QASYMM8) -FIXTURE_DATA_TEST_CASE(RunSmall, NEIm2ColFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(im2col_shapes, framework::dataset::make("DataType", DataType::QASYMM8)), - conv_args_small)) +FIXTURE_DATA_TEST_CASE(RunSmall, CpuIm2ColFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(im2col_shapes, framework::dataset::make("DataType", DataType::QASYMM8)), + conv_args_small)) { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEIm2ColFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(concat(im2col_shapes, datasets::LargeShapes()), - framework::dataset::make("DataType", DataType::QASYMM8)), - conv_args)) +FIXTURE_DATA_TEST_CASE(RunLarge, CpuIm2ColFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(concat(im2col_shapes, datasets::LargeShapes()), + framework::dataset::make("DataType", DataType::QASYMM8)), + conv_args)) { // Validate output validate(Accessor(_target), _reference); @@ -165,8 +165,8 @@ TEST_CASE(PaddedChannelNHWC, framework::DatasetMode::PRECOMMIT) Tensor dst_target = create_tensor<Tensor>(dst_shape, data_type, 1, qinfo); // Configure target function - NEIm2Col im2col_func; - im2col_func.configure(&src_target, &dst_target, spatial_kernel, conv_info, has_bias); + CpuIm2Col im2col_func; + im2col_func.configure(src_target.info(), dst_target.info(), spatial_kernel, conv_info, has_bias); // Extend padding src_target.info()->extend_padding(PaddingSize(3, 5, 9, 1)); @@ -185,8 +185,13 @@ TEST_CASE(PaddedChannelNHWC, framework::DatasetMode::PRECOMMIT) // Fill target source library->fill_tensor_uniform(Accessor(src_target), 0); + ITensorPack pack = + { + { TensorType::ACL_SRC, &src_target }, + { TensorType::ACL_DST, &dst_target } + }; // Run target function - im2col_func.run(); + im2col_func.run(pack); // Calculate Reference SimpleTensor<float> src_ref{ src_shape, data_type, 1, qinfo, data_layout }; diff --git a/tests/validation/fixtures/Im2ColFixture.h b/tests/validation/fixtures/Im2ColFixture.h index b1fbd76eb2..38970116f6 100644 --- a/tests/validation/fixtures/Im2ColFixture.h +++ b/tests/validation/fixtures/Im2ColFixture.h @@ -45,6 +45,97 @@ namespace validation using namespace arm_compute::misc::shape_calculator; template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool batch_size_on_z> +class Im2ColOpValidationFixture : public framework::Fixture +{ +public: + template <typename...> + void setup(TensorShape input_shape, DataType data_type, const Size2D &kernel_dims, const PadStrideInfo &conv_info, const QuantizationInfo &quant_info, const DataLayout &data_layout, + unsigned int num_groups) + { + _kernel_dims = kernel_dims; + _conv_info = conv_info; + _quant_info = quant_info; + _data_layout = data_layout; + _has_bias = data_type != DataType::QASYMM8; + _num_groups = num_groups; + + if(_data_layout == DataLayout::NHWC) + { + permute(input_shape, PermutationVector(2U, 0U, 1U)); + } + + TensorInfo input_info(input_shape, 1, data_type); + input_info.set_data_layout(_data_layout); + + const TensorShape output_shape = compute_im2col_conv_shape(&input_info, _kernel_dims, _conv_info, _has_bias, Size2D(1U, 1U), batch_size_on_z && _num_groups == 1, _num_groups); + _target = compute_target(input_shape, output_shape, data_type); + + compute_reference(input_shape, output_shape, data_type); + } + +protected: + template <typename U> + void fill(U &&tensor) + { + library->fill_tensor_uniform(tensor, 0); + } + + TensorType compute_target(const TensorShape &input_shape, const TensorShape &output_shape, DataType data_type) + { + // Create tensors + TensorType src = create_tensor<TensorType>(input_shape, data_type, 1, _quant_info, _data_layout); + TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, _quant_info); + + // Create and configure function + FunctionType im2col_func; + im2col_func.configure(src.info(), dst.info(), _kernel_dims, _conv_info, _has_bias, Size2D(1U, 1U), _num_groups); + + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + + // Fill tensors + fill(AccessorType(src)); + + arm_compute::ITensorPack pack = + { + { arm_compute::TensorType::ACL_SRC, &src }, + { arm_compute::TensorType::ACL_DST, &dst } + }; + // Compute function + im2col_func.run(pack); + + return dst; + } + + void compute_reference(const TensorShape &input_shape, const TensorShape &output_shape, DataType data_type) + { + // Create reference + SimpleTensor<T> src{ input_shape, data_type, 1, _quant_info, _data_layout }; + _reference = SimpleTensor<T>(output_shape, data_type, 1, _quant_info, DataLayout::NCHW); + + // Fill reference + fill(src); + + reference::im2col<T>(src, _reference, _kernel_dims, _conv_info, _has_bias, _num_groups); + } + TensorType _target{}; + SimpleTensor<T> _reference{}; + Size2D _kernel_dims{}; + PadStrideInfo _conv_info{}; + DataLayout _data_layout{}; + QuantizationInfo _quant_info{}; + bool _has_bias{}; + unsigned int _num_groups{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool batch_size_on_z> class Im2ColValidationFixture : public framework::Fixture { public: |