aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/cpu/operators/CpuGemmConvolution.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/cpu/operators/CpuGemmConvolution.cpp')
-rw-r--r--src/runtime/cpu/operators/CpuGemmConvolution.cpp28
1 files changed, 21 insertions, 7 deletions
diff --git a/src/runtime/cpu/operators/CpuGemmConvolution.cpp b/src/runtime/cpu/operators/CpuGemmConvolution.cpp
index 864d7e2d0b..81d656c905 100644
--- a/src/runtime/cpu/operators/CpuGemmConvolution.cpp
+++ b/src/runtime/cpu/operators/CpuGemmConvolution.cpp
@@ -341,10 +341,16 @@ void CpuGemmConvolution::configure(const ITensorInfo *src, const ITensorInfo *we
_reshape_kernel->configure(gemm_output_to_use, dst);
}
+ // Check if GEMM transforms weights
+ // Modernise through COMPMID-4535
+ bool gemm_trans_wei = _aux_mem[1].size > 0; // Asm Pretranspose
+ gemm_trans_wei = _mm_gemm != nullptr ? _aux_mem[3].size > 0 : gemm_trans_wei; // Tranpose RHS
+ gemm_trans_wei = _mm_gemmlowp != nullptr ? _aux_mem[5].size > 0 : gemm_trans_wei; // Transpose RHS
+
+ // Check lifetime
_aux_mem[Im2ColOutput] = MemoryInfo(offset_int_vec(Im2ColOutput), MemoryLifetime::Temporary, _im2col_output.total_size());
- _aux_mem[WeightsReshaped] = MemoryInfo(offset_int_vec(WeightsReshaped), MemoryLifetime::Prepare, _weights_reshaped.total_size());
+ _aux_mem[WeightsReshaped] = MemoryInfo(offset_int_vec(WeightsReshaped), gemm_trans_wei ? MemoryLifetime::Prepare : MemoryLifetime::Persistent, _weights_reshaped.total_size());
_aux_mem[GemmOutput] = MemoryInfo(offset_int_vec(GemmOutput), MemoryLifetime::Temporary, _gemm_output.total_size());
- _aux_mem[GemmOutput3d] = MemoryInfo(offset_int_vec(GemmOutput3d), MemoryLifetime::Temporary, _gemm_output_3d.total_size());
}
Status CpuGemmConvolution::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info,
@@ -493,6 +499,7 @@ void CpuGemmConvolution::run(ITensorPack &tensors)
CpuAuxTensorHandler im2col_output(offset_int_vec(Im2ColOutput), _im2col_output, tensors, false);
CpuAuxTensorHandler gemm_output(offset_int_vec(GemmOutput), _gemm_output, tensors, false);
+ CpuAuxTensorHandler reshaped_wei(offset_int_vec(WeightsReshaped), _weights_reshaped, tensors, false);
bool out_has_padding = _skip_col2im && (dst->info()->padding().bottom != 0 || dst->info()->padding().top != 0);
if(!_skip_im2col)
@@ -510,12 +517,15 @@ void CpuGemmConvolution::run(ITensorPack &tensors)
// Handle the case where output has top/bottom padding
const ITensor *out_to_use = out_has_padding ? gemm_output.get() : dst;
+ Tensor gemm3d;
_gemm_output_3d.extend_padding(out_to_use->info()->padding());
- CpuAuxTensorHandler gemm_output_3d(offset_int_vec(GemmOutput3d), _gemm_output_3d, tensors, true);
- auto gemm_output_to_use = gemm_output.get();
+ gemm3d.allocator()->soft_init(_gemm_output_3d);
+ gemm3d.allocator()->import_memory(out_to_use->buffer());
+ auto gemm_output_to_use = gemm_output.get();
+
if(_skip_im2col)
{
- gemm_output_to_use = gemm_output_3d.get();
+ gemm_output_to_use = &gemm3d;
}
if(_skip_col2im && !out_has_padding)
{
@@ -525,6 +535,7 @@ void CpuGemmConvolution::run(ITensorPack &tensors)
// Runs CpuGemm or CpuGemmLowpMatrixMultiplyCore functions
ITensorPack pack_mm = tensors;
pack_mm.add_const_tensor(TensorType::ACL_SRC_0, gemm_input_to_use);
+ pack_mm.add_const_tensor(TensorType::ACL_SRC_1, reshaped_wei.get());
pack_mm.add_tensor(TensorType::ACL_DST, gemm_output_to_use);
if(_is_quantized)
{
@@ -583,10 +594,13 @@ void CpuGemmConvolution::prepare(ITensorPack &tensors)
{ TensorType::ACL_DST, weights_reshaped.get() }
};
NEScheduler::get().schedule_op(_weights_reshape_kernel.get(), 3, _weights_reshape_kernel->window(), pack);
- tensors.add_const_tensor(TensorType::ACL_SRC_1, weights_reshaped.get());
+ weights->mark_as_unused();
// Prepare GEMM
- _is_quantized ? _mm_gemmlowp->prepare(tensors) : _mm_gemm->prepare(tensors);
+ ITensorPack gemm_pack = tensors;
+ gemm_pack.add_const_tensor(TensorType::ACL_SRC_1, weights_reshaped.get());
+ _is_quantized ? _mm_gemmlowp->prepare(gemm_pack) : _mm_gemm->prepare(gemm_pack);
+
_is_prepared = true;
}
}