diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2017-12-14 17:53:39 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:42:33 +0000 |
commit | 08c5a06e2b49df0d7912deedd6d26d2c603cfe58 (patch) | |
tree | 0742d970b5dff2354f917063ad11980af97dee93 /src/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.cpp | |
parent | 941cd706bea1847ae89e4ee13f144fc51050ad1f (diff) | |
download | ComputeLibrary-08c5a06e2b49df0d7912deedd6d26d2c603cfe58.tar.gz |
COMPMID-750: Fix assembly kernel interfaces
Assembly kernel interfaces were wrongly translating the layout of the
input matrices. Boolean flags transform0 and transform1 do not match the
actual interface of the gemm assembly code which expects transpose0 and
transposed1.
Change-Id: Ia4df65a533834647fa63e78e8c897924793949df
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/113410
Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.cpp')
-rw-r--r-- | src/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.cpp | 29 |
1 files changed, 15 insertions, 14 deletions
diff --git a/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.cpp b/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.cpp index db37201687..38f82f0407 100644 --- a/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.cpp +++ b/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.cpp @@ -56,7 +56,7 @@ NEGEMMLowpAArch64Kernel::NEGEMMLowpAArch64Kernel() { } -void gemm_interleaved_s8(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1, const Window &window, +void gemm_interleaved_s8(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0, bool is_transposed_1, const Window &window, const ThreadInfo &info) { const int lda = input0->info()->strides_in_bytes().y(); @@ -77,7 +77,7 @@ void gemm_interleaved_s8(const ITensor *input0, const ITensor *input1, ITensor * Iterator in0(input0, window); Iterator out(output, window); - GemmInterleaved<gemm_s8_4x4, int8_t, int32_t> gemm(&info.cpu_info, M, N, K, !transform_1, !transform_1); + GemmInterleaved<gemm_s8_4x4, int8_t, int32_t> gemm(&info.cpu_info, M, N, K, is_transposed_0, is_transposed_1); constexpr size_t alignment = 4096; const size_t offset = (gemm.get_working_size() + alignment - 1) * info.thread_id; @@ -99,7 +99,7 @@ void gemm_interleaved_s8(const ITensor *input0, const ITensor *input1, ITensor * in0, out); } -void gemm_interleaved_u8(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1, const Window &window, +void gemm_interleaved_u8(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0, bool is_transposed_1, const Window &window, const ThreadInfo &info) { const int lda = input0->info()->strides_in_bytes().y(); @@ -120,7 +120,7 @@ void gemm_interleaved_u8(const ITensor *input0, const ITensor *input1, ITensor * Iterator in0(input0, window); Iterator out(output, window); - GemmInterleaved<gemm_u8_4x4, uint8_t, uint32_t> gemm(&info.cpu_info, M, N, K, !transform_1, !transform_1); + GemmInterleaved<gemm_u8_4x4, uint8_t, uint32_t> gemm(&info.cpu_info, M, N, K, is_transposed_0, is_transposed_1); constexpr size_t alignment = 4096; const size_t offset = (gemm.get_working_size() + alignment - 1) * info.thread_id; @@ -142,20 +142,21 @@ void gemm_interleaved_u8(const ITensor *input0, const ITensor *input1, ITensor * in0, out); } -void NEGEMMLowpAArch64Kernel::internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1) +void NEGEMMLowpAArch64Kernel::internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0, + bool is_transposed_1) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::S8, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32, DataType::U32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1); - _input0 = input0; - _input1 = input1; - _output = output; - _workspace = workspace; - _alpha = alpha; - _beta = beta; - _transform_0 = transform_0; - _transform_1 = transform_1; + _input0 = input0; + _input1 = input1; + _output = output; + _workspace = workspace; + _alpha = alpha; + _beta = beta; + _is_transposed_0 = is_transposed_0; + _is_transposed_1 = is_transposed_1; switch(input0->info()->data_type()) { @@ -192,7 +193,7 @@ void NEGEMMLowpAArch64Kernel::run(const Window &window, const ThreadInfo &info) ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); - (*_func)(_input0, _input1, _output, _workspace, _alpha, _beta, _transform_0, _transform_1, window, info); + (*_func)(_input0, _input1, _output, _workspace, _alpha, _beta, _is_transposed_0, _is_transposed_1, window, info); } } // namespace arm_compute #endif /* ARM_COMPUTE_AARCH64_V8A */ |