diff options
author | Sang-Hoon Park <sang-hoon.park@arm.com> | 2021-05-18 10:46:00 +0100 |
---|---|---|
committer | Pablo Marquez Tello <pablo.tello@arm.com> | 2021-05-27 16:33:44 +0000 |
commit | b3be45759bdd0749ae3a16fe470820f0d9830ea9 (patch) | |
tree | 10bb8c1c0a049a23c00781c64e993f1b197c0d05 /src/runtime/NEON | |
parent | bc91297c865808ed2c321febc405179f63195ff8 (diff) | |
download | ComputeLibrary-b3be45759bdd0749ae3a16fe470820f0d9830ea9.tar.gz |
Implement memory injection in CpuDirectGemmConv2d
The following operators are now stateless by implementing
memory injection.
- CpuDirectGemmConv2d
- CpuGemmAssemblyDispatch
A test case is added to test if CpuDirectGemmConv2d can
run on different group of tensors with a single configure.
Resolves: COMPMID-4506
Change-Id: I48f44ed41236ca7e18da2de07bdbacc9007a3c5e
Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5718
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
Diffstat (limited to 'src/runtime/NEON')
-rw-r--r-- | src/runtime/NEON/functions/NEGEMM.cpp | 19 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEGEMMConv2d.cpp | 21 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp | 31 |
3 files changed, 55 insertions, 16 deletions
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index 7318c3e492..b526874790 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -38,6 +38,7 @@ #include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" #include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/MemoryHelpers.h" #include "src/runtime/cpu/operators/internal/CpuGemmAssemblyDispatch.h" #include <cmath> @@ -46,6 +47,14 @@ using namespace arm_compute::misc::shape_calculator; namespace arm_compute { +using WorkspaceDataType = WorkspaceData<Tensor>; + +struct NEGEMM::AsmGlueTensors +{ + ITensorPack tensors{}; + WorkspaceDataType ws{}; +}; + namespace { cpu::AsmGemmInfo init_assembly_metadata(const GEMMInfo &info) @@ -63,7 +72,7 @@ cpu::AsmGemmInfo init_assembly_metadata(const GEMMInfo &info) NEGEMM::NEGEMM(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager) : _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(std::make_unique<cpu::CpuGemmAssemblyDispatch>()), _ma_kernel(), _alpha_scale_func(nullptr), _add_bias(), _activation_func(), _tmp_a(), _tmp_b(), _tmp_d(), _original_b(nullptr), _run_vector_matrix_multiplication(false), _run_alpha_scale(false), - _run_addition(false), _run_bias_addition(false), _run_activation(false), _reshape_b_only_on_first_run(false), _is_prepared(false) + _run_addition(false), _run_bias_addition(false), _run_activation(false), _reshape_b_only_on_first_run(false), _is_prepared(false), _asm_glue_tensors(std::make_unique<AsmGlueTensors>()) { } @@ -94,7 +103,7 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe _asm_glue->configure(a->info(), b->info(), c_info_to_use, d->info(), asm_info); ARM_COMPUTE_ERROR_ON(!_asm_glue->is_configured()); - _asm_glue_tensors = + _asm_glue_tensors->tensors = { { ACL_SRC_0, a }, { ACL_SRC_1, b }, @@ -102,6 +111,8 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe { ACL_DST, d }, }; + _asm_glue_tensors->ws = manage_workspace<Tensor>(_asm_glue->workspace(), _memory_group, _asm_glue_tensors->tensors); + // Scale product by alpha if(_run_alpha_scale) { @@ -323,7 +334,7 @@ void NEGEMM::run() if(_asm_glue->is_configured()) { - _asm_glue->run(_asm_glue_tensors); + _asm_glue->run(_asm_glue_tensors->tensors); if(_run_alpha_scale) { _alpha_scale_func.run(); @@ -377,7 +388,7 @@ void NEGEMM::prepare() ARM_COMPUTE_ERROR_ON(!_original_b->is_used()); } - _asm_glue->prepare(_asm_glue_tensors); + _asm_glue->prepare(_asm_glue_tensors->tensors); if(!original_b_managed_by_weights_manager) { _original_b->mark_as_unused(); diff --git a/src/runtime/NEON/functions/NEGEMMConv2d.cpp b/src/runtime/NEON/functions/NEGEMMConv2d.cpp index 94ceb6d27c..790543a34a 100644 --- a/src/runtime/NEON/functions/NEGEMMConv2d.cpp +++ b/src/runtime/NEON/functions/NEGEMMConv2d.cpp @@ -26,24 +26,37 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/helpers/MemoryHelpers.h" #include "src/runtime/cpu/operators/CpuGemmDirectConv2d.h" #include <set> namespace arm_compute { -using OperatorType = cpu::CpuGemmDirectConv2d; +using OperatorType = cpu::CpuGemmDirectConv2d; +using WorkspaceDataType = WorkspaceData<Tensor>; struct NEGEMMConv2d::Impl { ITensorPack tensors{}; + MemoryGroup mg{}; std::unique_ptr<OperatorType> op{ nullptr }; + WorkspaceDataType ws{}; + + void allocate_and_add_workspace() + { + if(op) + { + ws = manage_workspace<Tensor>(op->workspace(), mg, tensors); + } + } }; NEGEMMConv2d::NEGEMMConv2d(const std::shared_ptr<IMemoryManager> &memory_manager) : _impl(std::make_unique<Impl>()) { - _impl->op = std::make_unique<OperatorType>(memory_manager); + _impl->op = std::make_unique<OperatorType>(); + _impl->mg = MemoryGroup(memory_manager); } NEGEMMConv2d::~NEGEMMConv2d() = default; @@ -55,7 +68,9 @@ void NEGEMMConv2d::configure(ITensor *input, const ITensor *weights, const ITens _impl->tensors.add_const_tensor(TensorType::ACL_SRC_2, biases); _impl->tensors.add_tensor(TensorType::ACL_DST, output); - _impl->op->configure(input->info(), weights->info(), biases->info(), output->info(), info); + _impl->op->configure(input->info(), weights->info(), ((biases) ? biases->info() : nullptr), output->info(), info); + + _impl->allocate_and_add_workspace(); } Status NEGEMMConv2d::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const Conv2dInfo &info) diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp index cc0f20e695..d42e656e0c 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp @@ -42,10 +42,17 @@ #include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" #include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" #include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "src/core/helpers/MemoryHelpers.h" #include "src/runtime/cpu/operators/internal/CpuGemmAssemblyDispatch.h" namespace arm_compute { +using WorkspaceDataType = WorkspaceData<Tensor>; +struct NEGEMMLowpMatrixMultiplyCore::AsmGlueTensors +{ + ITensorPack tensors{}; + WorkspaceDataType ws{}; +}; namespace { cpu::AsmGemmInfo init_assembly_metadata(const GEMMInfo &info) @@ -66,11 +73,11 @@ using namespace arm_compute::misc::shape_calculator; NEGEMMLowpMatrixMultiplyCore::~NEGEMMLowpMatrixMultiplyCore() = default; NEGEMMLowpMatrixMultiplyCore::NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager) - : _memory_group(memory_manager), _weights_manager(weights_manager), _asm_glue(std::make_unique<cpu::CpuGemmAssemblyDispatch>(memory_manager, weights_manager)), _mm_kernel(), _mtx_a_reshape_kernel(), + : _memory_group(memory_manager), _weights_manager(weights_manager), _asm_glue(std::make_unique<cpu::CpuGemmAssemblyDispatch>(weights_manager)), _mm_kernel(), _mtx_a_reshape_kernel(), _mtx_b_reshape_kernel(), _mtx_a_reduction_kernel(), _mtx_b_reduction_kernel(), _offset_contribution_kernel(), _offset_contribution_output_stage_kernel(), _activation_func(), _convert_to_signed_asymm(), _convert_from_signed_asymm(), _vector_sum_col(), _vector_sum_row(), _tmp_a(), _tmp_b(), _mm_result_s32(), _signed_a(), _signed_output(), _original_b(nullptr), _a_offset(0), _b_offset(0), _run_vector_matrix_multiplication(false), _assembly_path(false), _fused_assembly_path(false), _reshape_b_only_on_first_run(false), _is_prepared(false), _fuse_output_stage(false), - _run_activation(false), _flip_signedness(false) + _run_activation(false), _flip_signedness(false), _asm_glue_tensors(std::make_unique<AsmGlueTensors>()) { } @@ -149,18 +156,24 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, auto c_info_to_use = c == nullptr ? nullptr : c->info(); _asm_glue->configure(a_to_use->info(), b->info(), c_info_to_use, output->info(), asm_info); _fused_assembly_path = _asm_glue->is_configured(); - _asm_glue_tensors.add_const_tensor(TensorType::ACL_SRC_2, c); - _asm_glue_tensors.add_tensor(TensorType::ACL_DST, output); + _asm_glue_tensors->tensors.add_const_tensor(TensorType::ACL_SRC_2, c); + _asm_glue_tensors->tensors.add_tensor(TensorType::ACL_DST, output); } else { auto output_to_use = (_fuse_output_stage ? &_mm_result_s32 : output); _asm_glue->configure(a_to_use->info(), b->info(), nullptr, output_to_use->info(), asm_info); - _asm_glue_tensors.add_tensor(TensorType::ACL_DST, output_to_use); + _asm_glue_tensors->tensors.add_tensor(TensorType::ACL_DST, output_to_use); } _assembly_path = _asm_glue->is_configured(); - _asm_glue_tensors.add_const_tensor(TensorType::ACL_SRC_0, a_to_use); - _asm_glue_tensors.add_const_tensor(TensorType::ACL_SRC_1, b); + _asm_glue_tensors->tensors.add_const_tensor(TensorType::ACL_SRC_0, a_to_use); + _asm_glue_tensors->tensors.add_const_tensor(TensorType::ACL_SRC_1, b); + + if(_assembly_path) + { + _asm_glue_tensors->ws = manage_workspace<Tensor>(_asm_glue->workspace(), _memory_group, _asm_glue_tensors->tensors); + } + break; } default: @@ -520,7 +533,7 @@ void NEGEMMLowpMatrixMultiplyCore::run() // Run GEMM if(_asm_glue->is_configured()) { - _asm_glue->run(_asm_glue_tensors); + _asm_glue->run(_asm_glue_tensors->tensors); } else { @@ -590,7 +603,7 @@ void NEGEMMLowpMatrixMultiplyCore::prepare() ARM_COMPUTE_ERROR_ON(!_original_b->is_used()); } - _asm_glue->prepare(_asm_glue_tensors); + _asm_glue->prepare(_asm_glue_tensors->tensors); if(!original_b_managed_by_weights_manager) { _original_b->mark_as_unused(); |