From 9f7d55a3566b0f1044110000b033d663b26d3a6c Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Mon, 8 Feb 2021 13:20:24 +0000 Subject: Fix CLDepthwiseConvolutionLayer 3x3 QASYMM8 Fix errors when computing tensors with one element only - Replace Tensor3D with raw pointers so to get rid of offset to first element for NCHW layout - Add stronger out of bound constraints for NHWC layout - Set the border size to the input's padding for NHWC - Fill the strides == 0 with the largest stride, so to avoid accessing empty strides and multiplying by 0 Resolve COMPMID-4088 Change-Id: I751a4e6d7094b3c42306ff7f53af848fd35f19ac Signed-off-by: Giorgio Arena Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5024 Tested-by: Arm Jenkins Reviewed-by: Manuel Bottini Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp') diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp index 1f296f8e26..de986de9f6 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -352,8 +352,8 @@ void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::run(const Window &window, cl::Co if(_input1->info()->num_dimensions() < 3) { - // The stride_z for matrix B must be zero if we do not slice - ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0); + // The stride_w for matrix B must be the same as stride_z if we do not slice + ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]); } const size_t lhs_idx_batch_size = _reinterpret_input_as_3d && !_has_pad_y ? 3u : 2u; -- cgit v1.2.1