diff options
-rw-r--r-- | src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_native_16x4/generic.cpp | 4 | ||||
-rw-r--r-- | tests/datasets/SmallGEMMDataset.h | 3 |
2 files changed, 4 insertions, 3 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_native_16x4/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_native_16x4/generic.cpp index 2b846c7f10..89a16f75a0 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_native_16x4/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_native_16x4/generic.cpp @@ -56,8 +56,8 @@ void a64_sgemm_native_16x4(const float *A, int lda, const float *B, int ldb, flo float *c_ptr0 = C + (y * ldc); float *c_ptr1 = (activerows > 1) ? c_ptr0 + ldc : dummy_buffer; - float *c_ptr2 = (activerows > 1) ? c_ptr1 + ldc : dummy_buffer; - float *c_ptr3 = (activerows > 1) ? c_ptr2 + ldc : dummy_buffer; + float *c_ptr2 = (activerows > 2) ? c_ptr1 + ldc : dummy_buffer; + float *c_ptr3 = (activerows > 3) ? c_ptr2 + ldc : dummy_buffer; for (int x0=0; x0<N; x0+=16) { const float *a_ptr0 = a_ptr0_base; diff --git a/tests/datasets/SmallGEMMDataset.h b/tests/datasets/SmallGEMMDataset.h index 5d59c1d3b3..110868bcbd 100644 --- a/tests/datasets/SmallGEMMDataset.h +++ b/tests/datasets/SmallGEMMDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -44,6 +44,7 @@ public: { add_config(TensorShape(21U, 13U), TensorShape(33U, 21U), TensorShape(33U, 13U), TensorShape(33U, 13U), 1.0f, 0.0f); add_config(TensorShape(31U, 1U), TensorShape(23U, 31U), TensorShape(23U, 1U), TensorShape(23U, 1U), 1.0f, 0.0f); + add_config(TensorShape(8U, 2U), TensorShape(16U, 8U), TensorShape(16U, 2U), TensorShape(16U, 2U), 1.0f, 0.0f); add_config(TensorShape(38U, 12U), TensorShape(21U, 38U), TensorShape(21U, 12U), TensorShape(21U, 12U), 0.2f, 1.2f); add_config(TensorShape(32U, 1U), TensorShape(17U, 32U), TensorShape(17U, 1U), TensorShape(17U, 1U), 0.4f, 0.7f); } |