aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/NEON/functions/NEGEMM.cpp
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2021-06-21 14:45:44 +0100
committerMichele Di Giorgio <michele.digiorgio@arm.com>2021-06-29 15:58:33 +0000
commit53832b2bcce44c71fe31a618a81765294df55750 (patch)
tree2165c4703891202186df55b772353bab8606315d /src/runtime/NEON/functions/NEGEMM.cpp
parent72b56875b9bb30a9ed1d2ad38ec51fc88e435c35 (diff)
downloadComputeLibrary-53832b2bcce44c71fe31a618a81765294df55750.tar.gz
Port NEGEMM to memory injecting interface (Part 2)
- Port NEGEMMMatrixMultiplyKernel to the new API Partially resolves: COMPMID-4402 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Change-Id: I52b67055dc24bb3a417d6ec5aeeee86e21b74320 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5873 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/NEGEMM.cpp')
-rw-r--r--src/runtime/NEON/functions/NEGEMM.cpp19
1 files changed, 12 insertions, 7 deletions
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp
index 99a7db72b6..a52ca79504 100644
--- a/src/runtime/NEON/functions/NEGEMM.cpp
+++ b/src/runtime/NEON/functions/NEGEMM.cpp
@@ -34,9 +34,9 @@
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
#include "src/core/CPP/Validate.h"
-#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
#include "src/core/cpu/kernels/CpuGemmInterleave4x4Kernel.h"
#include "src/core/cpu/kernels/CpuGemmMatrixAdditionKernel.h"
+#include "src/core/cpu/kernels/CpuGemmMatrixMultiplyKernel.h"
#include "src/core/cpu/kernels/CpuGemmTranspose1xWKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/MemoryHelpers.h"
@@ -70,7 +70,7 @@ struct NEGEMM::Impl
std::unique_ptr<cpu::kernels::CpuGemmInterleave4x4Kernel> interleave_kernel{ nullptr };
std::unique_ptr<cpu::kernels::CpuGemmTranspose1xWKernel> transpose_kernel{ nullptr };
- std::unique_ptr<NEGEMMMatrixMultiplyKernel> mm_kernel{ nullptr };
+ std::unique_ptr<cpu::kernels::CpuGemmMatrixMultiplyKernel> mm_kernel{ nullptr };
std::unique_ptr<cpu::CpuGemmAssemblyDispatch> asm_glue{ nullptr };
std::unique_ptr<cpu::kernels::CpuGemmMatrixAdditionKernel> ma_kernel{ nullptr };
std::unique_ptr<cpu::CpuActivation> alpha_scale_func{ nullptr };
@@ -167,13 +167,13 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe
_impl->memory_group.manage(&_impl->tmp_d);
}
- _impl->mm_kernel = std::make_unique<NEGEMMMatrixMultiplyKernel>();
+ _impl->mm_kernel = std::make_unique<cpu::kernels::CpuGemmMatrixMultiplyKernel>();
// Select between GEMV and GEMM
if(_impl->run_vector_matrix_multiplication)
{
// Configure the matrix multiply kernel
- _impl->mm_kernel->configure(a, b, _impl->gemm_output_to_use, alpha, false);
+ _impl->mm_kernel->configure(a->info(), b->info(), _impl->gemm_output_to_use->info(), alpha, false);
}
else
{
@@ -213,7 +213,7 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe
_impl->transpose_kernel->configure(b->info(), _impl->tmp_b.info());
// Configure matrix multiplication kernel
- _impl->mm_kernel->configure(&_impl->tmp_a, &_impl->tmp_b, _impl->gemm_output_to_use, alpha, true, GEMMReshapeInfo(m, n, k));
+ _impl->mm_kernel->configure(_impl->tmp_a.info(), _impl->tmp_b.info(), _impl->gemm_output_to_use->info(), alpha, true, GEMMReshapeInfo(m, n, k));
// Allocate once the all configure methods have been called
_impl->tmp_a.allocator()->allocate();
@@ -342,7 +342,7 @@ Status NEGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso
// Validate matrix multiply
auto_init_if_empty(tmp_output_info, matrix_a_info->clone()->set_tensor_shape(compute_mm_shape(*matrix_a_info, *matrix_b_info, run_interleave_transpose, reshape_info)));
- ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixMultiplyKernel::validate(matrix_a_info, matrix_b_info, &tmp_output_info, alpha, run_interleave_transpose, reshape_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuGemmMatrixMultiplyKernel::validate(matrix_a_info, matrix_b_info, &tmp_output_info, alpha, run_interleave_transpose, reshape_info));
if(c != nullptr && gemm_info.reshape_b_only_on_first_run())
{
@@ -383,6 +383,7 @@ void NEGEMM::run()
}
else
{
+ ITensorPack mm_pack{ { ACL_SRC_0, _impl->a }, { ACL_SRC_1, _impl->original_b }, { ACL_DST, _impl->gemm_output_to_use } };
if(!_impl->run_vector_matrix_multiplication)
{
// Run interleave kernel
@@ -395,9 +396,13 @@ void NEGEMM::run()
ITensorPack transpose_pack{ { ACL_SRC, _impl->original_b }, { ACL_DST, &_impl->tmp_b } };
NEScheduler::get().schedule_op(_impl->transpose_kernel.get(), Window::DimY, _impl->transpose_kernel->window(), transpose_pack);
}
+
+ // Use reshaped matrices
+ mm_pack.add_const_tensor(ACL_SRC_0, &_impl->tmp_a);
+ mm_pack.add_const_tensor(ACL_SRC_1, &_impl->tmp_b);
}
- NEScheduler::get().schedule(_impl->mm_kernel.get(), _impl->run_vector_matrix_multiplication ? Window::DimX : Window::DimY);
+ NEScheduler::get().schedule_op(_impl->mm_kernel.get(), _impl->run_vector_matrix_multiplication ? Window::DimX : Window::DimY, _impl->mm_kernel->window(), mm_pack);
// Run bias addition kernel
if(_impl->run_bias_addition)