COMPMID-1607 - (Nightly) CLGEMMLowpMatrixMultiplyCore errors and mismatches

COMPMID-1608 - (Nightly) CLGEMMConvolutionLayer QASYMM8 errors and mismatches COMPMID-1609 - (Nightly) CLFullyConnectedLayer QASYMM8 mismatches Change-Id: I84c0d4f468be892f437f9f38b964dc7dfb66663a Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/150869 Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com> Tested-by: bsgcomp <bsgcomp@arm.com>
author: Isabella Gottardi <isabella.gottardi@arm.com> 2018-09-28 18:24:27 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:55:19 +0000
commit: b92805b2a0b8703f506fb287bcb0960232eca5bc (patch)
tree: 41b162611f83998f2d4cb551f135dd2bd4d008bd /src
parent: acaf21d7ae82e4de8da578c36e243fdc4c77c53d (diff)
download: ComputeLibrary-b92805b2a0b8703f506fb287bcb0960232eca5bc.tar.gz
3 files changed, 13 insertions, 6 deletions
diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl
index e8124e7aa8..0fc3868341 100644
--- a/src/core/CL/cl_kernels/gemmlowp.cl
+++ b/src/core/CL/cl_kernels/gemmlowp.cl
@@ -91,7 +91,7 @@ __kernel void gemmlowp_mm_interleaved_transposed_midgard(IMAGE_DECLARATION(src0)
 
     // src_addr_a = address of matrix A
     // src_addr_b = address of matrix B
-    __global uchar *src_addr_a = (__global uchar *)(src0_ptr + y * src0_stride_y + src0_offset_first_element_in_bytes);
+    __global uchar *src_addr_a = (__global uchar *)(src0_ptr + z * src0_stride_z + y * src0_stride_y + src0_offset_first_element_in_bytes);
     __global uchar *src_addr_b = (__global uchar *)(src1_ptr + x * src1_stride_y + src1_offset_first_element_in_bytes);
 
 #if defined(MATRIX_B_DEPTH)
@@ -251,7 +251,7 @@ __kernel void gemmlowp_mm_interleaved_transposed_bifrost(IMAGE_DECLARATION(src0)
 
     // src_addr_a = address of matrix A
     // src_addr_b = address of matrix B
-    __global uchar *src_addr_a = (__global uchar *)(src0_ptr + y * src0_stride_y + src0_offset_first_element_in_bytes);
+    __global uchar *src_addr_a = (__global uchar *)(src0_ptr + z * src0_stride_z + y * src0_stride_y + src0_offset_first_element_in_bytes);
     __global uchar *src_addr_b = (__global uchar *)(src1_ptr + x * src1_stride_y + src1_offset_first_element_in_bytes);
 
 #if defined(MATRIX_B_DEPTH)
@@ -610,7 +610,7 @@ __kernel void gemmlowp_mm_interleaved_transposed_bifrost_dot8(IMAGE_DECLARATION(
 
     // src_addr_a = address of matrix A
     // src_addr_b = address of matrix B
-    __global uchar *src_addr_a = (__global uchar *)(src0_ptr + y * src0_stride_y + src0_offset_first_element_in_bytes);
+    __global uchar *src_addr_a = (__global uchar *)(src0_ptr + z * src0_stride_z + y * src0_stride_y + src0_offset_first_element_in_bytes);
     __global uchar *src_addr_b = (__global uchar *)(src1_ptr + x * src1_stride_y + src1_offset_first_element_in_bytes);
 
 #if defined(MATRIX_B_DEPTH)
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
index cf66ebd5fe..ee364e5932 100644
--- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
@@ -325,8 +325,15 @@ void CLGEMMLowpMatrixMultiplyKernel::run(const Window &window, cl::CommandQueue
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
 
+    if(_input1->info()->num_dimensions() < 3)
+    {
+        // The stride_z for matrix B must be zero if we do not slice
+        ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
+    }
+
     Window slice          = window.first_slice_window_3D();
     Window slice_matrix_b = slice;
+
     slice_matrix_b.set(Window::DimX, Window::Dimension(0, 1, 1));
     slice_matrix_b.set(Window::DimY, Window::Dimension(0, 1, 1));
 
@@ -350,8 +357,8 @@ void CLGEMMLowpMatrixMultiplyKernel::run(const Window &window, cl::CommandQueue
     {
         Window slice_b = slice;
         // Don't slice matrix B along the z dimension if matrix B has just 2 dimensions and matrix A more than 2
-        // This scenario can happen when the the matrix multiplication is used to perform a convolution operation
-        if(_slide_matrix_b)
+        // This scenario can happen when the matrix multiplication is used to perform a convolution operation
+        if(!_slide_matrix_b)
         {
             slice_b = slice_matrix_b;
         }
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
index a3cf18a648..715edae606 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
@@ -384,7 +384,7 @@ void CLGEMMMatrixMultiplyKernel::run(const Window &window, cl::CommandQueue &que
 
     if(_reinterpret_input_as_3d)
     {
-        // Pass bottom paddings to the kernel if the output has to be reinterpreted as 3D tensor
+        // Pass bottom paddings to the kernel if the input has to be reinterpreted as 3D tensor
         const unsigned int idx0                  = 3 * num_arguments_per_2D_tensor() + 3;
         const unsigned int total_cross_plane_pad = _input0->info()->padding().top + _input0->info()->padding().bottom;
         _kernel.setArg<cl_uint>(idx0, static_cast<unsigned int>(total_cross_plane_pad));
author	Isabella Gottardi <isabella.gottardi@arm.com>	2018-09-28 18:24:27 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:55:19 +0000
commit	b92805b2a0b8703f506fb287bcb0960232eca5bc (patch)
tree	41b162611f83998f2d4cb551f135dd2bd4d008bd /src
parent	acaf21d7ae82e4de8da578c36e243fdc4c77c53d (diff)
download	ComputeLibrary-b92805b2a0b8703f506fb287bcb0960232eca5bc.tar.gz