aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2017-12-14 17:53:39 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:42:33 +0000
commit08c5a06e2b49df0d7912deedd6d26d2c603cfe58 (patch)
tree0742d970b5dff2354f917063ad11980af97dee93 /src
parent941cd706bea1847ae89e4ee13f144fc51050ad1f (diff)
downloadComputeLibrary-08c5a06e2b49df0d7912deedd6d26d2c603cfe58.tar.gz
COMPMID-750: Fix assembly kernel interfaces
Assembly kernel interfaces were wrongly translating the layout of the input matrices. Boolean flags transform0 and transform1 do not match the actual interface of the gemm assembly code which expects transpose0 and transposed1. Change-Id: Ia4df65a533834647fa63e78e8c897924793949df Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/113410 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com> Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/NEON/kernels/arm32/NEGEMMAArch32Kernel.cpp20
-rw-r--r--src/core/NEON/kernels/arm64/NEGEMMAArch64Kernel.cpp20
-rw-r--r--src/core/NEON/kernels/arm64/NEGEMMLowpAArch64A53Kernel.cpp31
-rw-r--r--src/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.cpp29
-rw-r--r--src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp21
-rw-r--r--src/core/NEON/kernels/arm64/NEHGEMMAArch64FP16Kernel.cpp21
-rw-r--r--src/runtime/NEON/functions/NEConvolutionLayer.cpp9
-rw-r--r--src/runtime/NEON/functions/NEGEMM.cpp2
-rw-r--r--src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp2
9 files changed, 77 insertions, 78 deletions
diff --git a/src/core/NEON/kernels/arm32/NEGEMMAArch32Kernel.cpp b/src/core/NEON/kernels/arm32/NEGEMMAArch32Kernel.cpp
index ad0743b50f..bffcbbf436 100644
--- a/src/core/NEON/kernels/arm32/NEGEMMAArch32Kernel.cpp
+++ b/src/core/NEON/kernels/arm32/NEGEMMAArch32Kernel.cpp
@@ -50,20 +50,20 @@ namespace arm_compute
namespace arm_compute
{
-void NEGEMMAArch32Kernel::internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1)
+void NEGEMMAArch32Kernel::internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0, bool is_transposed_1)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1, output);
ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input0, input1, output);
- _input0 = input0;
- _input1 = input1;
- _output = output;
- _workspace = workspace;
- _alpha = alpha;
- _beta = beta;
- _transform_0 = transform_0;
- _transform_1 = transform_1;
+ _input0 = input0;
+ _input1 = input1;
+ _output = output;
+ _workspace = workspace;
+ _alpha = alpha;
+ _beta = beta;
+ _is_transposed_0 = is_transposed_0;
+ _is_transposed_1 = is_transposed_1;
// Configure kernel window
Window win = calculate_max_window(*output->info());
@@ -104,7 +104,7 @@ void NEGEMMAArch32Kernel::run(const Window &window, const ThreadInfo &info)
Iterator in0(_input0, window);
Iterator out(_output, window);
- GemmInterleaved<sgemm_8x6, float, float> gemm(&info.cpu_info, M, N, K, !_transform_0, !_transform_1);
+ GemmInterleaved<sgemm_8x6, float, float> gemm(&info.cpu_info, M, N, K, _is_transposed_0, _is_transposed_1);
constexpr size_t alignment = 4096;
const size_t offset = (gemm.get_working_size() + alignment - 1) * info.thread_id;
void *workspace = _workspace->buffer() + offset;
diff --git a/src/core/NEON/kernels/arm64/NEGEMMAArch64Kernel.cpp b/src/core/NEON/kernels/arm64/NEGEMMAArch64Kernel.cpp
index d70524b6b8..0eaa9aa39b 100644
--- a/src/core/NEON/kernels/arm64/NEGEMMAArch64Kernel.cpp
+++ b/src/core/NEON/kernels/arm64/NEGEMMAArch64Kernel.cpp
@@ -50,20 +50,20 @@ namespace arm_compute
namespace arm_compute
{
-void NEGEMMAArch64Kernel::internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1)
+void NEGEMMAArch64Kernel::internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0, bool is_transposed_1)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1, output);
ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input0, input1, output);
- _input0 = input0;
- _input1 = input1;
- _output = output;
- _workspace = workspace;
- _alpha = alpha;
- _beta = beta;
- _transform_0 = transform_0;
- _transform_1 = transform_1;
+ _input0 = input0;
+ _input1 = input1;
+ _output = output;
+ _workspace = workspace;
+ _alpha = alpha;
+ _beta = beta;
+ _is_transposed_0 = is_transposed_0;
+ _is_transposed_1 = is_transposed_1;
// Configure kernel window
Window win = calculate_max_window(*output->info());
@@ -104,7 +104,7 @@ void NEGEMMAArch64Kernel::run(const Window &window, const ThreadInfo &info)
Iterator in0(_input0, window);
Iterator out(_output, window);
- GemmInterleaved<sgemm_12x8, float, float> gemm(&info.cpu_info, M, N, K, !_transform_0, !_transform_1);
+ GemmInterleaved<sgemm_12x8, float, float> gemm(&info.cpu_info, M, N, K, _is_transposed_0, _is_transposed_1);
constexpr size_t alignment = 4096;
const size_t offset = (gemm.get_working_size() + alignment - 1) * info.thread_id;
void *workspace = _workspace->buffer() + offset;
diff --git a/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64A53Kernel.cpp b/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64A53Kernel.cpp
index e020cd9118..80606dcc07 100644
--- a/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64A53Kernel.cpp
+++ b/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64A53Kernel.cpp
@@ -56,7 +56,8 @@ NEGEMMLowpAArch64A53Kernel::NEGEMMLowpAArch64A53Kernel()
{
}
-void gemm_interleaved_s16_12x8(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1, const Window &window,
+void gemm_interleaved_s16_12x8(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0, bool is_transposed_1,
+ const Window &window,
const ThreadInfo &info)
{
const int lda = input0->info()->strides_in_bytes().y();
@@ -77,7 +78,7 @@ void gemm_interleaved_s16_12x8(const ITensor *input0, const ITensor *input1, ITe
Iterator in0(input0, window);
Iterator out(output, window);
- GemmInterleaved<gemm_s16_12x8, int8_t, int32_t> gemm(&info.cpu_info, M, N, K, !transform_1, !transform_1);
+ GemmInterleaved<gemm_s16_12x8, int8_t, int32_t> gemm(&info.cpu_info, M, N, K, is_transposed_0, is_transposed_1);
constexpr size_t alignment = 4096;
const size_t offset = (gemm.get_working_size() + alignment - 1) * info.thread_id;
@@ -99,7 +100,8 @@ void gemm_interleaved_s16_12x8(const ITensor *input0, const ITensor *input1, ITe
in0, out);
}
-void gemm_interleaved_u16_12x8(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1, const Window &window,
+void gemm_interleaved_u16_12x8(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0, bool is_transposed_1,
+ const Window &window,
const ThreadInfo &info)
{
const int lda = input0->info()->strides_in_bytes().y();
@@ -120,7 +122,7 @@ void gemm_interleaved_u16_12x8(const ITensor *input0, const ITensor *input1, ITe
Iterator in0(input0, window);
Iterator out(output, window);
- GemmInterleaved<gemm_u16_12x8, uint8_t, uint32_t> gemm(&info.cpu_info, M, N, K, !transform_1, !transform_1);
+ GemmInterleaved<gemm_u16_12x8, uint8_t, uint32_t> gemm(&info.cpu_info, M, N, K, is_transposed_0, is_transposed_1);
constexpr size_t alignment = 4096;
const size_t offset = (gemm.get_working_size() + alignment - 1) * info.thread_id;
@@ -142,20 +144,21 @@ void gemm_interleaved_u16_12x8(const ITensor *input0, const ITensor *input1, ITe
in0, out);
}
-void NEGEMMLowpAArch64A53Kernel::internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1)
+void NEGEMMLowpAArch64A53Kernel::internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0,
+ bool is_transposed_1)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::S8, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1);
- _input0 = input0;
- _input1 = input1;
- _output = output;
- _workspace = workspace;
- _alpha = alpha;
- _beta = beta;
- _transform_0 = transform_0;
- _transform_1 = transform_1;
+ _input0 = input0;
+ _input1 = input1;
+ _output = output;
+ _workspace = workspace;
+ _alpha = alpha;
+ _beta = beta;
+ _is_transposed_0 = is_transposed_0;
+ _is_transposed_1 = is_transposed_1;
switch(input0->info()->data_type())
{
@@ -192,7 +195,7 @@ void NEGEMMLowpAArch64A53Kernel::run(const Window &window, const ThreadInfo &inf
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
ARM_COMPUTE_ERROR_ON(_func == nullptr);
- (*_func)(_input0, _input1, _output, _workspace, _alpha, _beta, _transform_0, _transform_1, window, info);
+ (*_func)(_input0, _input1, _output, _workspace, _alpha, _beta, _is_transposed_0, _is_transposed_1, window, info);
}
} // namespace arm_compute
#endif /* ARM_COMPUTE_AARCH64_V8A */
diff --git a/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.cpp b/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.cpp
index db37201687..38f82f0407 100644
--- a/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.cpp
+++ b/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.cpp
@@ -56,7 +56,7 @@ NEGEMMLowpAArch64Kernel::NEGEMMLowpAArch64Kernel()
{
}
-void gemm_interleaved_s8(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1, const Window &window,
+void gemm_interleaved_s8(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0, bool is_transposed_1, const Window &window,
const ThreadInfo &info)
{
const int lda = input0->info()->strides_in_bytes().y();
@@ -77,7 +77,7 @@ void gemm_interleaved_s8(const ITensor *input0, const ITensor *input1, ITensor *
Iterator in0(input0, window);
Iterator out(output, window);
- GemmInterleaved<gemm_s8_4x4, int8_t, int32_t> gemm(&info.cpu_info, M, N, K, !transform_1, !transform_1);
+ GemmInterleaved<gemm_s8_4x4, int8_t, int32_t> gemm(&info.cpu_info, M, N, K, is_transposed_0, is_transposed_1);
constexpr size_t alignment = 4096;
const size_t offset = (gemm.get_working_size() + alignment - 1) * info.thread_id;
@@ -99,7 +99,7 @@ void gemm_interleaved_s8(const ITensor *input0, const ITensor *input1, ITensor *
in0, out);
}
-void gemm_interleaved_u8(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1, const Window &window,
+void gemm_interleaved_u8(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0, bool is_transposed_1, const Window &window,
const ThreadInfo &info)
{
const int lda = input0->info()->strides_in_bytes().y();
@@ -120,7 +120,7 @@ void gemm_interleaved_u8(const ITensor *input0, const ITensor *input1, ITensor *
Iterator in0(input0, window);
Iterator out(output, window);
- GemmInterleaved<gemm_u8_4x4, uint8_t, uint32_t> gemm(&info.cpu_info, M, N, K, !transform_1, !transform_1);
+ GemmInterleaved<gemm_u8_4x4, uint8_t, uint32_t> gemm(&info.cpu_info, M, N, K, is_transposed_0, is_transposed_1);
constexpr size_t alignment = 4096;
const size_t offset = (gemm.get_working_size() + alignment - 1) * info.thread_id;
@@ -142,20 +142,21 @@ void gemm_interleaved_u8(const ITensor *input0, const ITensor *input1, ITensor *
in0, out);
}
-void NEGEMMLowpAArch64Kernel::internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1)
+void NEGEMMLowpAArch64Kernel::internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0,
+ bool is_transposed_1)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::S8, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32, DataType::U32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1);
- _input0 = input0;
- _input1 = input1;
- _output = output;
- _workspace = workspace;
- _alpha = alpha;
- _beta = beta;
- _transform_0 = transform_0;
- _transform_1 = transform_1;
+ _input0 = input0;
+ _input1 = input1;
+ _output = output;
+ _workspace = workspace;
+ _alpha = alpha;
+ _beta = beta;
+ _is_transposed_0 = is_transposed_0;
+ _is_transposed_1 = is_transposed_1;
switch(input0->info()->data_type())
{
@@ -192,7 +193,7 @@ void NEGEMMLowpAArch64Kernel::run(const Window &window, const ThreadInfo &info)
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
ARM_COMPUTE_ERROR_ON(_func == nullptr);
- (*_func)(_input0, _input1, _output, _workspace, _alpha, _beta, _transform_0, _transform_1, window, info);
+ (*_func)(_input0, _input1, _output, _workspace, _alpha, _beta, _is_transposed_0, _is_transposed_1, window, info);
}
} // namespace arm_compute
#endif /* ARM_COMPUTE_AARCH64_V8A */
diff --git a/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp b/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp
index e996e571ab..099934a49d 100644
--- a/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp
+++ b/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp
@@ -84,20 +84,21 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITe
namespace arm_compute
{
-void NEGEMMLowpAArch64V8P4Kernel::internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1)
+void NEGEMMLowpAArch64V8P4Kernel::internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0,
+ bool is_transposed_1)
{
// Perform validate step
ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info()));
- _input0 = input0;
- _input1 = input1;
- _output = output;
- _workspace = workspace;
- _alpha = alpha;
- _beta = beta;
- _transform_0 = transform_0;
- _transform_1 = transform_1;
+ _input0 = input0;
+ _input1 = input1;
+ _output = output;
+ _workspace = workspace;
+ _alpha = alpha;
+ _beta = beta;
+ _is_transposed_0 = is_transposed_0;
+ _is_transposed_1 = is_transposed_1;
// Configure kernel window
auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info());
@@ -136,7 +137,7 @@ void NEGEMMLowpAArch64V8P4Kernel::run(const Window &window, const ThreadInfo &in
Iterator in0(_input0, window);
Iterator out(_output, window);
- GemmInterleaved<gemm_u8_12x8, gemm_u8_12x8::operand_type, gemm_u8_12x8::result_type> gemm(&info.cpu_info, M, N, K, !_transform_1, !_transform_1);
+ GemmInterleaved<gemm_u8_12x8, gemm_u8_12x8::operand_type, gemm_u8_12x8::result_type> gemm(&info.cpu_info, M, N, K, _is_transposed_0, _is_transposed_1);
constexpr size_t alignment = 4096;
const size_t offset = (gemm.get_working_size() + alignment - 1) * info.thread_id;
diff --git a/src/core/NEON/kernels/arm64/NEHGEMMAArch64FP16Kernel.cpp b/src/core/NEON/kernels/arm64/NEHGEMMAArch64FP16Kernel.cpp
index 225630434b..38b9102c20 100644
--- a/src/core/NEON/kernels/arm64/NEHGEMMAArch64FP16Kernel.cpp
+++ b/src/core/NEON/kernels/arm64/NEHGEMMAArch64FP16Kernel.cpp
@@ -50,20 +50,21 @@ namespace arm_compute
namespace arm_compute
{
-void NEHGEMMAArch64FP16Kernel::internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1)
+void NEHGEMMAArch64FP16Kernel::internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0,
+ bool is_transposed_1)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F16);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1, output);
ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input0, input1, output);
- _input0 = input0;
- _input1 = input1;
- _output = output;
- _workspace = workspace;
- _alpha = alpha;
- _beta = beta;
- _transform_0 = transform_0;
- _transform_1 = transform_1;
+ _input0 = input0;
+ _input1 = input1;
+ _output = output;
+ _workspace = workspace;
+ _alpha = alpha;
+ _beta = beta;
+ _is_transposed_0 = is_transposed_0;
+ _is_transposed_1 = is_transposed_1;
// Configure kernel window
Window win = calculate_max_window(*output->info());
@@ -105,7 +106,7 @@ void NEHGEMMAArch64FP16Kernel::run(const Window &window, const ThreadInfo &info)
Iterator in0(_input0, window);
Iterator out(_output, window);
- GemmInterleaved<hgemm_24x8, hgemm_24x8::operand_type, hgemm_24x8::result_type> gemm(&info.cpu_info, M, N, K, !_transform_0, !_transform_1);
+ GemmInterleaved<hgemm_24x8, hgemm_24x8::operand_type, hgemm_24x8::result_type> gemm(&info.cpu_info, M, N, K, _is_transposed_0, _is_transposed_1);
constexpr size_t alignment = 4096;
const size_t offset = (gemm.get_working_size() + alignment - 1) * info.thread_id;
void *workspace = _workspace->buffer() + offset;
diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
index 25c639f7ea..5ca8eb8179 100644
--- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
@@ -273,14 +273,7 @@ void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights,
_memory_group.manage(&_workspace);
// Configure matrix multiplication kernel
- if(_is_fully_connected_convolution)
- {
- _mm_optimised_kernel->configure(&_input_im2col_reshaped, weights, &_gemm_output, &_workspace, 1.f, 0.f);
- }
- else
- {
- _mm_optimised_kernel->configure(&_input_im2col_reshaped, weights, &_gemm_output, &_workspace);
- }
+ _mm_optimised_kernel->configure(&_input_im2col_reshaped, weights, &_gemm_output, &_workspace);
_workspace.allocator()->allocate();
}
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp
index 950f4c9899..03ba43f901 100644
--- a/src/runtime/NEON/functions/NEGEMM.cpp
+++ b/src/runtime/NEON/functions/NEGEMM.cpp
@@ -142,7 +142,7 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe
_memory_group.manage(&_workspace);
// Configure matrix multiplication kernel
- _mm_optimised_kernel->configure(a, b, d, &_workspace, alpha, 0.f);
+ _mm_optimised_kernel->configure(a, b, d, &_workspace, alpha, 0.f, false /* is_transposed_0 */, false /* is_transposed_1 */);
_workspace.allocator()->allocate();
}
else
diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
index 50aa5b6d11..c4028dca1d 100644
--- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
@@ -84,7 +84,7 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b,
// Configure matrix multiplication kernel
auto k = arm_compute::support::cpp14::make_unique<NEGEMMLowpAArch64V8P4Kernel>();
- k->configure(a, b, output, &_workspace, 1.f, 1.f);
+ k->configure(a, b, output, &_workspace, 1.f, 1.f, false, false);
_mm_kernel = std::move(k);
}
else