aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichael Tyler <michael.tyler@arm.com>2024-06-25 14:09:37 +0100
committerMichael Tyler <michael.tyler@arm.com>2024-06-26 15:50:40 +0000
commite6836523ed6672ee1d622f240038a1173d57923c (patch)
treee4158dfdc0884cd304f83907a5f63b65450512fb /src
parent5d6fff041ade7eb44af0945867212f3979be3d3e (diff)
downloadComputeLibrary-e6836523ed6672ee1d622f240038a1173d57923c.tar.gz
Optimize memory management of CPU operators
Resolves COMPMID-7172 Change-Id: I0acac5e4cb24056a88b4356d9239b33721d65d13 Signed-off-by: Michael Tyler <michael.tyler@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11762 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Suhail M <MohammedSuhail.Munshi@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/cpu/operators/CpuGemm.cpp4
-rw-r--r--src/cpu/operators/CpuGemmConv2d.cpp10
-rw-r--r--src/cpu/operators/CpuWinogradConv2d.cpp4
-rw-r--r--src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp4
4 files changed, 14 insertions, 8 deletions
diff --git a/src/cpu/operators/CpuGemm.cpp b/src/cpu/operators/CpuGemm.cpp
index 905e86c185..c489b256b8 100644
--- a/src/cpu/operators/CpuGemm.cpp
+++ b/src/cpu/operators/CpuGemm.cpp
@@ -174,8 +174,8 @@ void CpuGemm::configure(const ITensorInfo *a,
// Configure rhs transpose1xw kernel
_transpose1xW_b_kernel = std::make_unique<cpu::kernels::CpuGemmTranspose1xWKernel>();
_transpose1xW_b_kernel->configure(b_to_use, &_tmp_b);
- _aux_mem[Transposed1xWRHS] =
- MemoryInfo(offset_int_vec(Transposed1xWRHS), MemoryLifetime::Persistent, _tmp_b.total_size());
+ const auto lifetime = _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary;
+ _aux_mem[Transposed1xWRHS] = MemoryInfo(offset_int_vec(Transposed1xWRHS), lifetime, _tmp_b.total_size());
// Use a and b here instead of _tmp_a and _tmp_b because CpuGemmMatrixMultiplyKernel requires the original m,n,k in case of interleaved a and transposed1xw b
const int m = a->dimension(1);
diff --git a/src/cpu/operators/CpuGemmConv2d.cpp b/src/cpu/operators/CpuGemmConv2d.cpp
index 55d950ff4a..f3b78f8885 100644
--- a/src/cpu/operators/CpuGemmConv2d.cpp
+++ b/src/cpu/operators/CpuGemmConv2d.cpp
@@ -589,8 +589,14 @@ void CpuGemmConv2d::configure(const ITensorInfo *src,
// WeightsReshaped in prepare
// Otherwise WeightsReshaped is the final transformation of weights and needs to persist
bool gemm_trans_wei = _aux_mem[GemmAsmPretransposedRHS].size > 0;
- gemm_trans_wei = _mm_gemm != nullptr ? _aux_mem[GemmTransposed1xWRHS].size > 0 : gemm_trans_wei;
- gemm_trans_wei = _mm_gemmlowp != nullptr ? _aux_mem[GemmLowpTransposed1xWRHS].size > 0 : gemm_trans_wei;
+ if (_mm_gemm != nullptr)
+ {
+ gemm_trans_wei |= _aux_mem[GemmTransposed1xWRHS].size > 0;
+ }
+ if (_mm_gemmlowp != nullptr)
+ {
+ gemm_trans_wei |= _aux_mem[GemmLowpTransposed1xWRHS].size > 0;
+ }
_aux_mem[WeightsReshaped] = MemoryInfo(offset_int_vec(WeightsReshaped),
gemm_trans_wei ? MemoryLifetime::Prepare : MemoryLifetime::Persistent,
diff --git a/src/cpu/operators/CpuWinogradConv2d.cpp b/src/cpu/operators/CpuWinogradConv2d.cpp
index 7d81aee0e9..7ed2f14ac5 100644
--- a/src/cpu/operators/CpuWinogradConv2d.cpp
+++ b/src/cpu/operators/CpuWinogradConv2d.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2023 Arm Limited.
+ * Copyright (c) 2021-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -309,7 +309,7 @@ void CpuWinogradConv2d::configure(const ITensorInfo *src,
std::max(input_workspace_size, output_workspace_size));
_aux_mem[PermutedWeights] =
MemoryInfo(offset_int_vec(PermutedWeights), MemoryLifetime::Prepare, _weights_hwio.total_size());
- _aux_mem[TransformedWeights] = MemoryInfo(offset_int_vec(TransformedWeights), MemoryLifetime::Persistent,
+ _aux_mem[TransformedWeights] = MemoryInfo(offset_int_vec(TransformedWeights), MemoryLifetime::Prepare,
wds.weight_matrix_size_bytes, storage_alignment);
if (_data_layout == DataLayout::NCHW)
{
diff --git a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp
index 156a798d50..785837dbc6 100644
--- a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp
+++ b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp
@@ -519,8 +519,8 @@ void Fallback<TypeInput, TypeWeight, TypeOutput, OutputStage>::configure(const I
const unsigned int alignment = 128;
const size_t B_pretranspose_size = _gemm_kernel_asm->get_B_pretransposed_array_size();
_pretranspose_info = TensorInfo(TensorShape(B_pretranspose_size), 1, DataType::U8);
- _aux_mem[Pretranspose] =
- MemoryInfo(offset_int_vec(Pretranspose), MemoryLifetime::Persistent, B_pretranspose_size, alignment);
+ MemoryLifetime lifetime = _is_b_constant ? MemoryLifetime::Persistent : MemoryLifetime::Temporary;
+ _aux_mem[Pretranspose] = MemoryInfo(offset_int_vec(Pretranspose), lifetime, B_pretranspose_size, alignment);
}
// Handle indirect GEMM convolution