From e92c23eb7a91ddd12feeb40cd8bc5d766c6fe5c3 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 23 Jul 2021 20:38:47 +0100 Subject: Fix allocation of prepare tensor on ClWinogradConv2d Preparation tensor was allocated during execution. Avoid if GEMM used by the Winograd function is reshaped. Resolves: COMPMID-4640 Signed-off-by: Georgios Pinitas Change-Id: Id5b7e6bbfe9f69661baa9d42698cdc2cc1b422b2 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5991 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- src/runtime/gpu/cl/operators/ClWinogradConv2d.cpp | 19 +++++++------------ src/runtime/gpu/cl/utils/ClAuxTensorHandler.h | 10 ++++++++-- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/runtime/gpu/cl/operators/ClWinogradConv2d.cpp b/src/runtime/gpu/cl/operators/ClWinogradConv2d.cpp index 2ca1ff59df..07f90ddaef 100644 --- a/src/runtime/gpu/cl/operators/ClWinogradConv2d.cpp +++ b/src/runtime/gpu/cl/operators/ClWinogradConv2d.cpp @@ -233,37 +233,32 @@ Status ClWinogradConv2d::validate(const ITensorInfo *src, const ITensorInfo *wei void ClWinogradConv2d::run(ITensorPack &tensors) { - prepare(tensors); + const bool is_gemm_reshaped = _aux_mem[3].lifetime == MemoryLifetime::Prepare; auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_0)); auto biases = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_2)); auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); CLAuxTensorHandler input0(offset_int_vec(2), _input0, tensors, true); - CLAuxTensorHandler input1(offset_int_vec(3), _input1, tensors, true); + CLAuxTensorHandler input1(offset_int_vec(3), _input1, tensors, true, is_gemm_reshaped); CLAuxTensorHandler batched_mm_output(offset_int_vec(4), _batched_mm_output, tensors, true); + prepare(tensors); + // Run input transform ITensorPack pack_it { { TensorType::ACL_SRC, src }, { TensorType::ACL_DST, input0.get() }, }; - CLScheduler::get().enqueue_op(_border_handler, pack_it); - CLScheduler::get().enqueue_op(*_input_transform, pack_it); + CLScheduler::get().enqueue_op(_border_handler, pack_it, false); + CLScheduler::get().enqueue_op(*_input_transform, pack_it, false); // Run batched matrix multiplication ITensorPack pack_mm = tensors; pack_mm.add_const_tensor(TensorType::ACL_SRC_0, input0.get()); pack_mm.add_tensor(TensorType::ACL_DST, batched_mm_output.get()); - if(_aux_mem[3].lifetime == MemoryLifetime::Prepare) - { - pack_mm.remove_tensor(TensorType::ACL_SRC_1); - } - else - { - pack_mm.add_const_tensor(TensorType::ACL_SRC_1, input1.get()); - } + is_gemm_reshaped ? pack_mm.remove_tensor(TensorType::ACL_SRC_1) : pack_mm.add_const_tensor(TensorType::ACL_SRC_1, input1.get()); _batched_mm.run(pack_mm); // Run output transform diff --git a/src/runtime/gpu/cl/utils/ClAuxTensorHandler.h b/src/runtime/gpu/cl/utils/ClAuxTensorHandler.h index 1cf717cf6f..af383489a1 100644 --- a/src/runtime/gpu/cl/utils/ClAuxTensorHandler.h +++ b/src/runtime/gpu/cl/utils/ClAuxTensorHandler.h @@ -28,6 +28,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/runtime/CL/CLTensor.h" +#include "src/common/utils/Log.h" #include "support/Cast.h" namespace arm_compute @@ -38,7 +39,7 @@ namespace opencl class CLAuxTensorHandler { public: - CLAuxTensorHandler(int slot_id, TensorInfo &info, ITensorPack &pack, bool pack_inject = false) + CLAuxTensorHandler(int slot_id, TensorInfo &info, ITensorPack &pack, bool pack_inject = false, bool bypass_alloc = false) : _tensor() { if(info.total_size() == 0) @@ -50,7 +51,12 @@ public: ICLTensor *packed_tensor = utils::cast::polymorphic_downcast(pack.get_tensor(slot_id)); if((packed_tensor == nullptr) || (info.total_size() > packed_tensor->info()->total_size())) { - _tensor.allocator()->allocate(); + if(!bypass_alloc) + { + _tensor.allocator()->allocate(); + ARM_COMPUTE_LOG_INFO_WITH_FUNCNAME_ACL("Allocating auxiliary tensor"); + } + if(pack_inject) { pack.add_tensor(slot_id, &_tensor); -- cgit v1.2.1