From 08c5a06e2b49df0d7912deedd6d26d2c603cfe58 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 14 Dec 2017 17:53:39 +0000 Subject: COMPMID-750: Fix assembly kernel interfaces Assembly kernel interfaces were wrongly translating the layout of the input matrices. Boolean flags transform0 and transform1 do not match the actual interface of the gemm assembly code which expects transpose0 and transposed1. Change-Id: Ia4df65a533834647fa63e78e8c897924793949df Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/113410 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com Reviewed-by: Pablo Tello --- src/core/NEON/kernels/arm32/NEGEMMAArch32Kernel.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'src/core/NEON/kernels/arm32') diff --git a/src/core/NEON/kernels/arm32/NEGEMMAArch32Kernel.cpp b/src/core/NEON/kernels/arm32/NEGEMMAArch32Kernel.cpp index ad0743b50f..bffcbbf436 100644 --- a/src/core/NEON/kernels/arm32/NEGEMMAArch32Kernel.cpp +++ b/src/core/NEON/kernels/arm32/NEGEMMAArch32Kernel.cpp @@ -50,20 +50,20 @@ namespace arm_compute namespace arm_compute { -void NEGEMMAArch32Kernel::internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1) +void NEGEMMAArch32Kernel::internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0, bool is_transposed_1) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input0, input1, output); - _input0 = input0; - _input1 = input1; - _output = output; - _workspace = workspace; - _alpha = alpha; - _beta = beta; - _transform_0 = transform_0; - _transform_1 = transform_1; + _input0 = input0; + _input1 = input1; + _output = output; + _workspace = workspace; + _alpha = alpha; + _beta = beta; + _is_transposed_0 = is_transposed_0; + _is_transposed_1 = is_transposed_1; // Configure kernel window Window win = calculate_max_window(*output->info()); @@ -104,7 +104,7 @@ void NEGEMMAArch32Kernel::run(const Window &window, const ThreadInfo &info) Iterator in0(_input0, window); Iterator out(_output, window); - GemmInterleaved gemm(&info.cpu_info, M, N, K, !_transform_0, !_transform_1); + GemmInterleaved gemm(&info.cpu_info, M, N, K, _is_transposed_0, _is_transposed_1); constexpr size_t alignment = 4096; const size_t offset = (gemm.get_working_size() + alignment - 1) * info.thread_id; void *workspace = _workspace->buffer() + offset; -- cgit v1.2.1