aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2021-08-12 07:42:51 +0100
committerPablo Marquez Tello <pablo.tello@arm.com>2021-08-13 08:14:39 +0000
commitd4a5bc5b87d970c196c6ea2a6b8bc2119005e0ca (patch)
tree675bb531117fa1f402ee37bfda8eb606f47a43ba
parent39aebd13faab1d9159fbd701ca0d1d77a125b6b2 (diff)
downloadComputeLibrary-d4a5bc5b87d970c196c6ea2a6b8bc2119005e0ca.tar.gz
Ensure correct transformed matrices are used in CpuGemmConvolution
Resolves: COMPMID-4763 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: Iae2e093cfb7d2c7172603897afe1c6a2e5d1caa3 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/349725 Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-by: Pablo Tello <pablo.tello@arm.com> Comments-Addressed: bsgcomp <bsgcomp@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6101 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
-rw-r--r--src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp18
-rw-r--r--src/runtime/cpu/operators/CpuGemmConvolution.cpp28
-rw-r--r--src/runtime/cpu/operators/CpuGemmConvolution.h1
-rw-r--r--src/runtime/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp1
4 files changed, 24 insertions, 24 deletions
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index f63fcb02fd..c32584ec0d 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -39,7 +39,6 @@ struct NEGEMMConvolutionLayer::Impl
const ITensor *weights{ nullptr };
std::unique_ptr<cpu::CpuGemmConvolution> op{ nullptr };
ITensorPack run_pack{};
- ITensorPack prep_pack{};
MemoryGroup memory_group{};
IWeightsManager *weights_manager{ nullptr };
MemoryRequirements aux_mem_req{};
@@ -70,13 +69,8 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
{ TensorType::ACL_SRC_2, biases },
{ TensorType::ACL_DST, output }
};
- _impl->prep_pack =
- {
- { TensorType::ACL_SRC_1, weights },
- { TensorType::ACL_SRC_2, biases },
- };
_impl->aux_mem_req = _impl->op->workspace();
- _impl->workspace_tensors = manage_workspace<Tensor>(_impl->aux_mem_req, _impl->memory_group, _impl->run_pack, _impl->prep_pack);
+ _impl->workspace_tensors = manage_workspace<Tensor>(_impl->aux_mem_req, _impl->memory_group, _impl->run_pack, _impl->run_pack);
}
Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
@@ -96,15 +90,7 @@ void NEGEMMConvolutionLayer::prepare()
{
if(!_impl->is_prepared)
{
- _impl->op->prepare(_impl->prep_pack);
- auto has_reshape = std::find_if(_impl->aux_mem_req.begin(),
- _impl->aux_mem_req.end(),
- [](const MemoryInfo & m) -> bool { return m.lifetime == MemoryLifetime::Persistent; });
-
- if(has_reshape != std::end(_impl->aux_mem_req))
- {
- _impl->weights->mark_as_unused();
- }
+ _impl->op->prepare(_impl->run_pack);
// Release temporary tensors that are only used in prepare stage
release_temporaries<Tensor>(_impl->aux_mem_req, _impl->workspace_tensors);
diff --git a/src/runtime/cpu/operators/CpuGemmConvolution.cpp b/src/runtime/cpu/operators/CpuGemmConvolution.cpp
index 864d7e2d0b..81d656c905 100644
--- a/src/runtime/cpu/operators/CpuGemmConvolution.cpp
+++ b/src/runtime/cpu/operators/CpuGemmConvolution.cpp
@@ -341,10 +341,16 @@ void CpuGemmConvolution::configure(const ITensorInfo *src, const ITensorInfo *we
_reshape_kernel->configure(gemm_output_to_use, dst);
}
+ // Check if GEMM transforms weights
+ // Modernise through COMPMID-4535
+ bool gemm_trans_wei = _aux_mem[1].size > 0; // Asm Pretranspose
+ gemm_trans_wei = _mm_gemm != nullptr ? _aux_mem[3].size > 0 : gemm_trans_wei; // Tranpose RHS
+ gemm_trans_wei = _mm_gemmlowp != nullptr ? _aux_mem[5].size > 0 : gemm_trans_wei; // Transpose RHS
+
+ // Check lifetime
_aux_mem[Im2ColOutput] = MemoryInfo(offset_int_vec(Im2ColOutput), MemoryLifetime::Temporary, _im2col_output.total_size());
- _aux_mem[WeightsReshaped] = MemoryInfo(offset_int_vec(WeightsReshaped), MemoryLifetime::Prepare, _weights_reshaped.total_size());
+ _aux_mem[WeightsReshaped] = MemoryInfo(offset_int_vec(WeightsReshaped), gemm_trans_wei ? MemoryLifetime::Prepare : MemoryLifetime::Persistent, _weights_reshaped.total_size());
_aux_mem[GemmOutput] = MemoryInfo(offset_int_vec(GemmOutput), MemoryLifetime::Temporary, _gemm_output.total_size());
- _aux_mem[GemmOutput3d] = MemoryInfo(offset_int_vec(GemmOutput3d), MemoryLifetime::Temporary, _gemm_output_3d.total_size());
}
Status CpuGemmConvolution::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info,
@@ -493,6 +499,7 @@ void CpuGemmConvolution::run(ITensorPack &tensors)
CpuAuxTensorHandler im2col_output(offset_int_vec(Im2ColOutput), _im2col_output, tensors, false);
CpuAuxTensorHandler gemm_output(offset_int_vec(GemmOutput), _gemm_output, tensors, false);
+ CpuAuxTensorHandler reshaped_wei(offset_int_vec(WeightsReshaped), _weights_reshaped, tensors, false);
bool out_has_padding = _skip_col2im && (dst->info()->padding().bottom != 0 || dst->info()->padding().top != 0);
if(!_skip_im2col)
@@ -510,12 +517,15 @@ void CpuGemmConvolution::run(ITensorPack &tensors)
// Handle the case where output has top/bottom padding
const ITensor *out_to_use = out_has_padding ? gemm_output.get() : dst;
+ Tensor gemm3d;
_gemm_output_3d.extend_padding(out_to_use->info()->padding());
- CpuAuxTensorHandler gemm_output_3d(offset_int_vec(GemmOutput3d), _gemm_output_3d, tensors, true);
- auto gemm_output_to_use = gemm_output.get();
+ gemm3d.allocator()->soft_init(_gemm_output_3d);
+ gemm3d.allocator()->import_memory(out_to_use->buffer());
+ auto gemm_output_to_use = gemm_output.get();
+
if(_skip_im2col)
{
- gemm_output_to_use = gemm_output_3d.get();
+ gemm_output_to_use = &gemm3d;
}
if(_skip_col2im && !out_has_padding)
{
@@ -525,6 +535,7 @@ void CpuGemmConvolution::run(ITensorPack &tensors)
// Runs CpuGemm or CpuGemmLowpMatrixMultiplyCore functions
ITensorPack pack_mm = tensors;
pack_mm.add_const_tensor(TensorType::ACL_SRC_0, gemm_input_to_use);
+ pack_mm.add_const_tensor(TensorType::ACL_SRC_1, reshaped_wei.get());
pack_mm.add_tensor(TensorType::ACL_DST, gemm_output_to_use);
if(_is_quantized)
{
@@ -583,10 +594,13 @@ void CpuGemmConvolution::prepare(ITensorPack &tensors)
{ TensorType::ACL_DST, weights_reshaped.get() }
};
NEScheduler::get().schedule_op(_weights_reshape_kernel.get(), 3, _weights_reshape_kernel->window(), pack);
- tensors.add_const_tensor(TensorType::ACL_SRC_1, weights_reshaped.get());
+ weights->mark_as_unused();
// Prepare GEMM
- _is_quantized ? _mm_gemmlowp->prepare(tensors) : _mm_gemm->prepare(tensors);
+ ITensorPack gemm_pack = tensors;
+ gemm_pack.add_const_tensor(TensorType::ACL_SRC_1, weights_reshaped.get());
+ _is_quantized ? _mm_gemmlowp->prepare(gemm_pack) : _mm_gemm->prepare(gemm_pack);
+
_is_prepared = true;
}
}
diff --git a/src/runtime/cpu/operators/CpuGemmConvolution.h b/src/runtime/cpu/operators/CpuGemmConvolution.h
index 578586e7d1..7755bbe2a2 100644
--- a/src/runtime/cpu/operators/CpuGemmConvolution.h
+++ b/src/runtime/cpu/operators/CpuGemmConvolution.h
@@ -174,7 +174,6 @@ private:
Im2ColOutput = 9,
WeightsReshaped,
GemmOutput,
- GemmOutput3d,
Count
};
diff --git a/src/runtime/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp b/src/runtime/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp
index f22446863c..7affc3f506 100644
--- a/src/runtime/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp
@@ -529,6 +529,7 @@ void CpuGemmLowpMatrixMultiplyCore::run(ITensorPack &tensors)
};
NEScheduler::get().schedule_op(_convert_to_signed_asymm.get(), Window::DimY, _convert_to_signed_asymm->window(), pack);
a_to_use = signed_a.get();
+ matrix_a = signed_a.get();
}
// Run GEMM