diff options
author | Gian Marco <gianmarco.iodice@arm.com> | 2017-12-16 19:33:50 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:42:33 +0000 |
commit | 1d25ed54a948639d1894c8b021940df70005d519 (patch) | |
tree | 96a29126c5b61299d64496fad7f6844412ab2cca /src/runtime/NEON/functions | |
parent | 57b20109108a90113d29d21ce7d3c873ff19749c (diff) | |
download | ComputeLibrary-1d25ed54a948639d1894c8b021940df70005d519.tar.gz |
COMPMID-759 - CLGEMM optimization for McVail benchmarks
This patch introduces an optimization for CLGEMM on Bifrost
architectures which can bring to 40% of FMA utilization on
config 3 of McVail. The new CLGEMM does not require any reshape of
matrix A and matrix B.
This patch also adds the auto-config in CLConvolutionLayer and CLGEMM
and extends the interface for NEGEMM and CLGEMM.
Change-Id: Ibb354eda45e9ca64b14a99700fb21dff5989dda9
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/113716
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions')
-rw-r--r-- | src/runtime/NEON/functions/NEGEMM.cpp | 22 |
1 files changed, 18 insertions, 4 deletions
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index 03ba43f901..e640b0604c 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -50,15 +50,17 @@ namespace arm_compute { NEGEMM::NEGEMM(std::shared_ptr<IMemoryManager> memory_manager) : _memory_group(std::move(memory_manager)), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _mm_optimised_kernel(nullptr), _ma_kernel(), _tmp_a(), _tmp_b(), _workspace(), - _run_vector_matrix_multiplication(false), _run_addition(false) + _run_vector_matrix_multiplication(false), _run_addition(false), _is_first_run(true), _reshape_b_only_on_first_run(false) { } -void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta) +void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::F32, DataType::F16, DataType::QS8, DataType::QS16); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(a, b, d); ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(0) != b->info()->dimension(1), "The product AB is defined only if the number of columns in A is equal to the number of rows in B"); + ARM_COMPUTE_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported"); + ARM_COMPUTE_ERROR_ON_MSG(gemm_info.is_b_reshaped(), "Matrix B already reshaped is not supported"); if(c != nullptr) { @@ -70,6 +72,8 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe ARM_COMPUTE_ERROR_ON_MSG(c->info()->dimension(1) != d->info()->dimension(1), "The C matrix must have the same number of columns as the output matrix"); } + // Check if we need to reshape the matrix B only on the first run + _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run(); _run_vector_matrix_multiplication = a->info()->dimension(1) < 2; // Check if the first input tensor is a vector. @@ -207,8 +211,18 @@ void NEGEMM::run() // Run interleave kernel NEScheduler::get().schedule(&_interleave_kernel, Window::DimY); - // Run transpose kernel - NEScheduler::get().schedule(&_transpose_kernel, Window::DimY); + if(_is_first_run) + { + // Run transpose kernel + NEScheduler::get().schedule(&_transpose_kernel, Window::DimY); + + _is_first_run = false; + } + else if(!_reshape_b_only_on_first_run) + { + // Run transpose kernel + NEScheduler::get().schedule(&_transpose_kernel, Window::DimY); + } } NEScheduler::get().schedule(&_mm_kernel, _run_vector_matrix_multiplication ? Window::DimX : Window::DimY); |