From edc524ef7ed38e0521c874f28bb9a1f2407b44c6 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Wed, 10 Feb 2021 11:54:47 +0000 Subject: Revert changes on tensor's strides and fix CLDepthwiseConvolution 3x3 Quantized - Revert changes in strides > num_dimensions. Set them to 0 - Fix offset calculcation in depthwise 3x3 quantized using select and stride_y for max offset Resolve COMPMID-4254 Change-Id: Ia99b9637f18b99b1fa3d4b7b4892046027d3e7e5 Signed-off-by: Giorgio Arena Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5040 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins --- .../cl_kernels/depthwise_convolution_quantized.cl | 23 +++++++-------------- .../CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp | 4 ++-- .../CLGEMMLowpMatrixMultiplyNativeKernel.cpp | 4 ++-- .../CLGEMMLowpMatrixMultiplyReshapedKernel.cpp | 4 ++-- ...GEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp | 4 ++-- src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp | 4 ++-- .../kernels/CLGEMMMatrixMultiplyNativeKernel.cpp | 4 ++-- .../kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp | 4 ++-- .../CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp | 4 ++-- src/core/helpers/Utils.h | 24 ---------------------- tests/validation/UNIT/TensorInfo.cpp | 10 ++++----- 11 files changed, 28 insertions(+), 61 deletions(-) diff --git a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl index 285c00a713..c7fe401f80 100644 --- a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl +++ b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl @@ -910,9 +910,7 @@ __kernel void dwc_3x3_reshaped_quantized8_nhwc( // A "-1" 32 bit signed variable converted to unsigned gives 4294967295 z_coord = z * (int)CONV_STRIDE_Y - (int)CONV_PAD_TOP; z_coord = min((uint)z_coord, (uint)SRC_DIM_2); - offset = y_offset + (int4)(z_coord * src_stride_z); - offset = min(offset, (int4)max_offset); - + offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2); VEC_TYPE(VEC_SIZE) values0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0)); VEC_TYPE(VEC_SIZE) @@ -925,8 +923,7 @@ __kernel void dwc_3x3_reshaped_quantized8_nhwc( // Moreover z_coord cannot be out-of-bound for z = 1 so we do not need to clamp the offset z_coord = z * (int)CONV_STRIDE_Y - (int)CONV_PAD_TOP + DILATION_Y; z_coord = min((uint)z_coord, (uint)SRC_DIM_2); - offset = y_offset + (int4)(z_coord * src_stride_z); - offset = min(offset, (int4)max_offset); + offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2); VEC_TYPE(VEC_SIZE) values3 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0)); VEC_TYPE(VEC_SIZE) @@ -938,8 +935,7 @@ __kernel void dwc_3x3_reshaped_quantized8_nhwc( // Offset can be out-of-bound so we need to check if it is greater than max_offset z_coord = z * (int)CONV_STRIDE_Y - (int)CONV_PAD_TOP + DILATION_Y * 2; z_coord = min((uint)z_coord, (uint)SRC_DIM_2); - offset = y_offset + (int4)(z_coord * src_stride_z); - offset = min(offset, (int4)max_offset); + offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2); VEC_TYPE(VEC_SIZE) values6 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0)); VEC_TYPE(VEC_SIZE) @@ -1156,9 +1152,7 @@ __kernel void dwc_3x3_reshaped_quantized8_stride1_nhwc( // A "-1" 32 bit signed variable converted to unsigned gives 4294967295 z_coord = z * (int)NUM_PLANES_PROCESSED - (int)CONV_PAD_TOP; z_coord = min((uint)z_coord, (uint)SRC_DIM_2); - offset = y_offset + (int4)(z_coord * src_stride_z); - offset = min(offset, (int4)max_offset); - + offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2); VEC_TYPE(VEC_SIZE) values0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0)); VEC_TYPE(VEC_SIZE) @@ -1171,8 +1165,7 @@ __kernel void dwc_3x3_reshaped_quantized8_stride1_nhwc( // z == 1 z_coord = z * (int)NUM_PLANES_PROCESSED - (int)CONV_PAD_TOP + 1; z_coord = min((uint)z_coord, (uint)SRC_DIM_2); - offset = y_offset + (int4)(z_coord * src_stride_z); - offset = min(offset, (int4)max_offset); + offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2); VEC_TYPE(VEC_SIZE) values4 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0)); VEC_TYPE(VEC_SIZE) @@ -1185,8 +1178,7 @@ __kernel void dwc_3x3_reshaped_quantized8_stride1_nhwc( // z == 2 z_coord = z * (int)NUM_PLANES_PROCESSED - (int)CONV_PAD_TOP + 2; z_coord = min((uint)z_coord, (uint)SRC_DIM_2); - offset = y_offset + (int4)(z_coord * src_stride_z); - offset = min(offset, (int4)max_offset); + offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2); VEC_TYPE(VEC_SIZE) values8 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0)); VEC_TYPE(VEC_SIZE) @@ -1199,8 +1191,7 @@ __kernel void dwc_3x3_reshaped_quantized8_stride1_nhwc( // z == 3 z_coord = z * (int)NUM_PLANES_PROCESSED - (int)CONV_PAD_TOP + 3; z_coord = min((uint)z_coord, (uint)SRC_DIM_2); - offset = y_offset + (int4)(z_coord * src_stride_z); - offset = min(offset, (int4)max_offset); + offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2); VEC_TYPE(VEC_SIZE) values12 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0)); VEC_TYPE(VEC_SIZE) diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp index 43c3ff3bfd..d13afd2010 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp @@ -438,8 +438,8 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::run(const Window &window, cl::Com // |__________________| // | pad_bottom | // |******************| - const int max_offset = _input->info()->strides_in_bytes().z() * _input->info()->dimension(2) - (_input->info()->padding().bottom + _input->info()->padding().top) * - _input->info()->strides_in_bytes().y(); + const int max_offset = ((_input->info()->dimension(1) * _input->info()->dimension(2)) + (_input->info()->padding().bottom + _input->info()->padding().top) * (_input->info()->dimension( + 2) - 1)) * _input->info()->strides_in_bytes().y(); _kernel.setArg(idx, max_offset); } diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp index 5633ee5a28..9215fd602d 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp @@ -284,8 +284,8 @@ void CLGEMMLowpMatrixMultiplyNativeKernel::run(const Window &window, cl::Command if(_input1->info()->num_dimensions() < 3) { - // The stride_w for matrix B must be the same as stride_z if we do not slice - ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]); + // The stride_z for matrix B must be zero if we do not slice + ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0); } Window slice = window.first_slice_window_3D(); diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp index 3043e01514..848f272e50 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp @@ -254,8 +254,8 @@ void CLGEMMLowpMatrixMultiplyReshapedKernel::run(const Window &window, cl::Comma if(_input1->info()->num_dimensions() < 3) { - // The stride_w for matrix B must be the same as stride_z if we do not slice - ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]); + // The stride_z for matrix B must be zero if we do not slice + ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0); } Window slice = window.first_slice_window_3D(); diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp index 0122e3ba4b..eba52b08bd 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp @@ -488,8 +488,8 @@ void CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::run(const Window &window, cl if(_input1->info()->num_dimensions() < 3) { - // The stride_w for matrix B must be the same as stride_z if we do not slice - ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]); + // The stride_z for matrix B must be zero if we do not slice + ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0); } Window slice = window.first_slice_window_3D(); diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp index a653e29f8f..6d3b1e5897 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp @@ -484,8 +484,8 @@ void CLGEMMMatrixMultiplyKernel::run(const Window &window, cl::CommandQueue &que if(_input1->info()->num_dimensions() < 3) { - // The stride_w for matrix B must be the same as stride_z if we do not slice - ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]); + // The stride_z for matrix B must be zero if we do not slice + ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0); } Window slice = window.first_slice_window_3D(); diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp index fefcd2f74d..f07166e4bb 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp @@ -348,8 +348,8 @@ void CLGEMMMatrixMultiplyNativeKernel::run(const Window &window, cl::CommandQueu if(_input1->info()->num_dimensions() < 3) { - // The stride_w for matrix B must be the same as stride_z if we do not slice - ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]); + // The stride_z for matrix B must be zero if we do not slice + ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0); } Window slice = window.first_slice_window_3D(); diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp index 8a403555f5..9f1ffa48eb 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp @@ -359,8 +359,8 @@ void CLGEMMMatrixMultiplyReshapedKernel::run(const Window &window, cl::CommandQu if(_input1->info()->num_dimensions() < 3) { - // The stride_w for matrix B must be the same as stride_z if we do not slice - ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]); + // The stride_z for matrix B must be zero if we do not slice + ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0); } Window slice = window.first_slice_window_3D(); diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp index de986de9f6..3dee4f24cd 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp @@ -352,8 +352,8 @@ void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::run(const Window &window, cl::Co if(_input1->info()->num_dimensions() < 3) { - // The stride_w for matrix B must be the same as stride_z if we do not slice - ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]); + // The stride_z for matrix B must be zero if we do not slice + ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0); } const size_t lhs_idx_batch_size = _reinterpret_input_as_3d && !_has_pad_y ? 3u : 2u; diff --git a/src/core/helpers/Utils.h b/src/core/helpers/Utils.h index d64eddb9aa..326dc962c7 100644 --- a/src/core/helpers/Utils.h +++ b/src/core/helpers/Utils.h @@ -50,30 +50,6 @@ inline Strides compute_strides(const ITensorInfo &info, T stride_x, Ts &&... fix strides.set(i, shape[i - 1] * strides[i - 1]); } - size_t first_zero = std::distance(strides.begin(), std::find_if(strides.begin(), strides.end(), [](uint32_t val) - { - return val == 0U; - })); - - if(first_zero > 0) - { - if(first_zero == 1) - { - strides.set(1, strides[0] * (shape[0] + info.padding().left + info.padding().right)); - ++first_zero; - } - else if(first_zero == 2) - { - strides.set(2, strides[1] * (shape[1] + info.padding().top + info.padding().bottom)); - ++first_zero; - } - - for(size_t i = first_zero; i < Strides::num_max_dimensions; ++i) - { - strides.set(i, strides[first_zero - 1]); - } - } - return strides; } diff --git a/tests/validation/UNIT/TensorInfo.cpp b/tests/validation/UNIT/TensorInfo.cpp index 44c9342389..cf9dfeabe9 100644 --- a/tests/validation/UNIT/TensorInfo.cpp +++ b/tests/validation/UNIT/TensorInfo.cpp @@ -60,11 +60,11 @@ DATA_TEST_CASE(AutoPadding, framework::DatasetMode::ALL, zip(zip(zip( PaddingSize{ 4, 36, 4, 4 }})), framework::dataset::make("Strides", { Strides{}, - Strides{ 1U, 50U, 50U, 50U, 50U, 50U }, - Strides{ 1U, 50U, 900U, 900U, 900U, 900U }, - Strides{ 1U, 50U, 900U, 900U, 900U, 900U }, - Strides{ 1U, 50U, 900U, 9000U, 9000U, 9000U }, - Strides{ 1U, 50U, 900U, 9000U, 90000U, 90000U }, + Strides{ 1U, 50U }, + Strides{ 1U, 50U }, + Strides{ 1U, 50U, 900U }, + Strides{ 1U, 50U, 900U, 9000U }, + Strides{ 1U, 50U, 900U, 9000U, 90000U }, Strides{ 1U, 50U, 900U, 9000U, 90000U, 900000U }})), framework::dataset::make("Offset", { 0U, 4U, 204U, 204U, 204U, 204U, 204U })), shape, auto_padding, strides, offset) -- cgit v1.2.1