From e6836523ed6672ee1d622f240038a1173d57923c Mon Sep 17 00:00:00 2001 From: Michael Tyler Date: Tue, 25 Jun 2024 14:09:37 +0100 Subject: Optimize memory management of CPU operators Resolves COMPMID-7172 Change-Id: I0acac5e4cb24056a88b4356d9239b33721d65d13 Signed-off-by: Michael Tyler Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11762 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Suhail M Comments-Addressed: Arm Jenkins --- src/cpu/operators/CpuGemm.cpp | 4 ++-- src/cpu/operators/CpuGemmConv2d.cpp | 10 ++++++++-- src/cpu/operators/CpuWinogradConv2d.cpp | 4 ++-- src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp | 4 ++-- 4 files changed, 14 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/cpu/operators/CpuGemm.cpp b/src/cpu/operators/CpuGemm.cpp index 905e86c185..c489b256b8 100644 --- a/src/cpu/operators/CpuGemm.cpp +++ b/src/cpu/operators/CpuGemm.cpp @@ -174,8 +174,8 @@ void CpuGemm::configure(const ITensorInfo *a, // Configure rhs transpose1xw kernel _transpose1xW_b_kernel = std::make_unique(); _transpose1xW_b_kernel->configure(b_to_use, &_tmp_b); - _aux_mem[Transposed1xWRHS] = - MemoryInfo(offset_int_vec(Transposed1xWRHS), MemoryLifetime::Persistent, _tmp_b.total_size()); + const auto lifetime = _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary; + _aux_mem[Transposed1xWRHS] = MemoryInfo(offset_int_vec(Transposed1xWRHS), lifetime, _tmp_b.total_size()); // Use a and b here instead of _tmp_a and _tmp_b because CpuGemmMatrixMultiplyKernel requires the original m,n,k in case of interleaved a and transposed1xw b const int m = a->dimension(1); diff --git a/src/cpu/operators/CpuGemmConv2d.cpp b/src/cpu/operators/CpuGemmConv2d.cpp index 55d950ff4a..f3b78f8885 100644 --- a/src/cpu/operators/CpuGemmConv2d.cpp +++ b/src/cpu/operators/CpuGemmConv2d.cpp @@ -589,8 +589,14 @@ void CpuGemmConv2d::configure(const ITensorInfo *src, // WeightsReshaped in prepare // Otherwise WeightsReshaped is the final transformation of weights and needs to persist bool gemm_trans_wei = _aux_mem[GemmAsmPretransposedRHS].size > 0; - gemm_trans_wei = _mm_gemm != nullptr ? _aux_mem[GemmTransposed1xWRHS].size > 0 : gemm_trans_wei; - gemm_trans_wei = _mm_gemmlowp != nullptr ? _aux_mem[GemmLowpTransposed1xWRHS].size > 0 : gemm_trans_wei; + if (_mm_gemm != nullptr) + { + gemm_trans_wei |= _aux_mem[GemmTransposed1xWRHS].size > 0; + } + if (_mm_gemmlowp != nullptr) + { + gemm_trans_wei |= _aux_mem[GemmLowpTransposed1xWRHS].size > 0; + } _aux_mem[WeightsReshaped] = MemoryInfo(offset_int_vec(WeightsReshaped), gemm_trans_wei ? MemoryLifetime::Prepare : MemoryLifetime::Persistent, diff --git a/src/cpu/operators/CpuWinogradConv2d.cpp b/src/cpu/operators/CpuWinogradConv2d.cpp index 7d81aee0e9..7ed2f14ac5 100644 --- a/src/cpu/operators/CpuWinogradConv2d.cpp +++ b/src/cpu/operators/CpuWinogradConv2d.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -309,7 +309,7 @@ void CpuWinogradConv2d::configure(const ITensorInfo *src, std::max(input_workspace_size, output_workspace_size)); _aux_mem[PermutedWeights] = MemoryInfo(offset_int_vec(PermutedWeights), MemoryLifetime::Prepare, _weights_hwio.total_size()); - _aux_mem[TransformedWeights] = MemoryInfo(offset_int_vec(TransformedWeights), MemoryLifetime::Persistent, + _aux_mem[TransformedWeights] = MemoryInfo(offset_int_vec(TransformedWeights), MemoryLifetime::Prepare, wds.weight_matrix_size_bytes, storage_alignment); if (_data_layout == DataLayout::NCHW) { diff --git a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp index 156a798d50..785837dbc6 100644 --- a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp +++ b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp @@ -519,8 +519,8 @@ void Fallback::configure(const I const unsigned int alignment = 128; const size_t B_pretranspose_size = _gemm_kernel_asm->get_B_pretransposed_array_size(); _pretranspose_info = TensorInfo(TensorShape(B_pretranspose_size), 1, DataType::U8); - _aux_mem[Pretranspose] = - MemoryInfo(offset_int_vec(Pretranspose), MemoryLifetime::Persistent, B_pretranspose_size, alignment); + MemoryLifetime lifetime = _is_b_constant ? MemoryLifetime::Persistent : MemoryLifetime::Temporary; + _aux_mem[Pretranspose] = MemoryInfo(offset_int_vec(Pretranspose), lifetime, B_pretranspose_size, alignment); } // Handle indirect GEMM convolution -- cgit v1.2.1