From 932b561159cd6a8c9230bbd0343790c85755846e Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 3 May 2018 13:44:35 +0100 Subject: COMPMID-959: Perform pretranspose if allowed on NEON assembly Change-Id: I281699ce7270aec1317c47b5a13799954cf6c9e8 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130010 Tested-by: Jenkins Reviewed-by: Pablo Tello Reviewed-by: Anthony Barbier --- arm_compute/runtime/NEON/functions/NEGEMM.h | 1 + arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h | 1 + .../runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h | 1 + arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h | 1 + 4 files changed, 4 insertions(+) (limited to 'arm_compute/runtime/NEON/functions') diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index 5279995be4..e2263c2307 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -83,6 +83,7 @@ private: Tensor _tmp_a; Tensor _tmp_b; Tensor _workspace; + Tensor _B_pretransposed; bool _run_vector_matrix_multiplication; bool _run_addition; bool _is_first_run; diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h index 24e23f133a..752693188c 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h @@ -185,6 +185,7 @@ private: Tensor _gemm_output; Tensor _tmp_output; Tensor _workspace; + Tensor _B_pretransposed; bool _append_bias; bool _is_fully_connected_convolution; diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h index f09c94e726..11ca1bc313 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h @@ -67,6 +67,7 @@ private: Tensor _tmp_a; Tensor _tmp_b; Tensor _workspace; + Tensor _B_pretransposed; }; } #endif /*__ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h index 3c9fb0ea5f..adcddb8263 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h @@ -102,6 +102,7 @@ private: Tensor _tmp_a; Tensor _tmp_b; Tensor _workspace; + Tensor _B_pretranspose; int32_t _a_offset; int32_t _b_offset; bool _run_vector_matrix_multiplication; -- cgit v1.2.1