From 932b561159cd6a8c9230bbd0343790c85755846e Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Thu, 3 May 2018 13:44:35 +0100
Subject: COMPMID-959: Perform pretranspose if allowed on NEON assembly

Change-Id: I281699ce7270aec1317c47b5a13799954cf6c9e8
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130010
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
---
 arm_compute/runtime/NEON/functions/NEGEMM.h                              | 1 +
 arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h              | 1 +
 .../runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h        | 1 +
 arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h        | 1 +
 4 files changed, 4 insertions(+)

(limited to 'arm_compute/runtime/NEON/functions')

diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index 5279995be4..e2263c2307 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -83,6 +83,7 @@ private:
     Tensor                     _tmp_a;
     Tensor                     _tmp_b;
     Tensor                     _workspace;
+    Tensor                     _B_pretransposed;
     bool                       _run_vector_matrix_multiplication;
     bool                       _run_addition;
     bool                       _is_first_run;
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index 24e23f133a..752693188c 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -185,6 +185,7 @@ private:
     Tensor _gemm_output;
     Tensor _tmp_output;
     Tensor _workspace;
+    Tensor _B_pretransposed;
 
     bool _append_bias;
     bool _is_fully_connected_convolution;
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h
index f09c94e726..11ca1bc313 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h
@@ -67,6 +67,7 @@ private:
     Tensor                     _tmp_a;
     Tensor                     _tmp_b;
     Tensor                     _workspace;
+    Tensor                     _B_pretransposed;
 };
 }
 #endif /*__ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
index 3c9fb0ea5f..adcddb8263 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
@@ -102,6 +102,7 @@ private:
     Tensor                             _tmp_a;
     Tensor                             _tmp_b;
     Tensor                             _workspace;
+    Tensor                             _B_pretranspose;
     int32_t                            _a_offset;
     int32_t                            _b_offset;
     bool                               _run_vector_matrix_multiplication;
-- 
cgit v1.2.1