From 22f5ed51f1b01f7cf6993a556a0b763e437926fc Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 23 Jul 2021 18:58:43 +0100 Subject: Avoid allocation of auxiliary memory in CpuGemmConvolution Resolves: COMPMID-4690 Signed-off-by: Georgios Pinitas Change-Id: I2d44fd59fc66e2d3e80acffd1a130f6d3fab5c57 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5990 Tested-by: Arm Jenkins --- src/runtime/cpu/operators/CpuGemmConvolution.cpp | 12 +++--------- src/runtime/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp | 1 + 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/runtime/cpu/operators/CpuGemmConvolution.cpp b/src/runtime/cpu/operators/CpuGemmConvolution.cpp index e7fae9da7b..6a78b0cf29 100644 --- a/src/runtime/cpu/operators/CpuGemmConvolution.cpp +++ b/src/runtime/cpu/operators/CpuGemmConvolution.cpp @@ -488,8 +488,6 @@ void CpuGemmConvolution::run(ITensorPack &tensors) prepare(tensors); auto src = tensors.get_const_tensor(ACL_SRC_0); - auto weights = tensors.get_const_tensor(ACL_SRC_1); - auto biases = tensors.get_const_tensor(ACL_SRC_2); auto dst = tensors.get_tensor(ACL_DST); auto gemm_input_to_use = src; @@ -525,13 +523,9 @@ void CpuGemmConvolution::run(ITensorPack &tensors) } // Runs CpuGemm or CpuGemmLowpMatrixMultiplyCore functions - ITensorPack pack_mm = - { - { TensorType::ACL_SRC_0, gemm_input_to_use }, - { TensorType::ACL_SRC_1, weights }, - { TensorType::ACL_SRC_2, biases }, - { TensorType::ACL_DST, gemm_output_to_use } - }; + ITensorPack pack_mm = tensors; + pack_mm.add_const_tensor(TensorType::ACL_SRC_0, gemm_input_to_use); + pack_mm.add_tensor(TensorType::ACL_DST, gemm_output_to_use); if(_is_quantized) { // Run gemmlowp diff --git a/src/runtime/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp b/src/runtime/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp index 56eb4fbb87..8adf7047fd 100644 --- a/src/runtime/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp +++ b/src/runtime/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp @@ -502,6 +502,7 @@ Status CpuGemmLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITens void CpuGemmLowpMatrixMultiplyCore::run(ITensorPack &tensors) { prepare(tensors); + auto a = tensors.get_const_tensor(TensorType::ACL_SRC_0); auto b = tensors.get_const_tensor(TensorType::ACL_SRC_1); auto c = tensors.get_const_tensor(TensorType::ACL_SRC_2); -- cgit v1.2.1