From ff0bccfb4697c591d569db9c2dc223f2e311a7d3 Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Fri, 30 Nov 2018 10:42:40 +0000 Subject: COMPMID-1497: Add support for interleaved B reshaping in gemm_interleaved Change-Id: I2171e1bf707bdcfa221c18d7a8904979e110020d Reviewed-on: https://review.mlplatform.org/326 Tested-by: Arm Jenkins Reviewed-by: Pablo Marquez Reviewed-by: Georgios Pinitas --- .../NEGEMMInterleavedMatrixMultiplyWrapper.cpp | 12 +++++++++- .../NEGEMMInterleavedPrepareBWrapperKernel.cpp | 27 ++++++++++++++++++---- 2 files changed, 33 insertions(+), 6 deletions(-) (limited to 'src/core/NEON/kernels/assembly') diff --git a/src/core/NEON/kernels/assembly/NEGEMMInterleavedMatrixMultiplyWrapper.cpp b/src/core/NEON/kernels/assembly/NEGEMMInterleavedMatrixMultiplyWrapper.cpp index 2c9cd320f0..3b2975dd80 100644 --- a/src/core/NEON/kernels/assembly/NEGEMMInterleavedMatrixMultiplyWrapper.cpp +++ b/src/core/NEON/kernels/assembly/NEGEMMInterleavedMatrixMultiplyWrapper.cpp @@ -101,6 +101,14 @@ void NEGEMMInterleavedMatrixMultiplyWrapperTemplate::create_wor using strategy = typename Kernel::strategy; unsigned int offset_transformed_b = 0; + unsigned int wl_index = 0; + unsigned int num_buffers = 0, reshaped_block_size = 0; + + if(!_b_is_pretransposed) + { + num_buffers = _transformed_b->info()->tensor_shape()[1]; + reshaped_block_size = _transformed_b->info()->tensor_shape()[0]; + } execute_window_loop(_block_walker, [&](const Coordinates & id) { const unsigned int x0 = id.x(); @@ -122,7 +130,9 @@ void NEGEMMInterleavedMatrixMultiplyWrapperTemplate::create_wor } else { - ARM_COMPUTE_ERROR("Not supported"); + // Rotate through the BufferManager's buffers: + wl_index++; + offset_transformed_b = (wl_index % num_buffers) * reshaped_block_size; } }); } diff --git a/src/core/NEON/kernels/assembly/NEGEMMInterleavedPrepareBWrapperKernel.cpp b/src/core/NEON/kernels/assembly/NEGEMMInterleavedPrepareBWrapperKernel.cpp index 41a031c1c7..7fc57f3c02 100644 --- a/src/core/NEON/kernels/assembly/NEGEMMInterleavedPrepareBWrapperKernel.cpp +++ b/src/core/NEON/kernels/assembly/NEGEMMInterleavedPrepareBWrapperKernel.cpp @@ -35,10 +35,18 @@ namespace arm_compute namespace { // Call the lambda function for each workload generated by the passed window. -template +template void for_each_element_in_window(const Window &window, const ITensor *b, ITensor *transformed_b, unsigned int N, unsigned int K, Lambda &&lambda) { - using strategy = typename Kernel::strategy; + using strategy = typename Kernel::strategy; + unsigned int wl_index = 0; + unsigned int num_buffers = 0, reshaped_block_size = 0; + + if(use_buffer_manager) + { + num_buffers = transformed_b->info()->tensor_shape()[1]; + reshaped_block_size = transformed_b->info()->strides_in_bytes().y(); + } unsigned int offset_transformed_b = transformed_b->info()->offset_first_element_in_bytes(); execute_window_loop(window, [&](const Coordinates & coordinates) @@ -62,7 +70,16 @@ void for_each_element_in_window(const Window &window, const ITensor *b, ITensor lambda(PrepareBWorkload(offset_b, offset_transformed_b, x0, xmax, k0, kmax)); //Each workload represents one block: - offset_transformed_b += (x_size * k_size * sizeof(To)); + if(use_buffer_manager) + { + // Rotate through the BufferManager's buffers: + wl_index++; + offset_transformed_b = (wl_index % num_buffers) * reshaped_block_size; + } + else + { + offset_transformed_b += (x_size * k_size * sizeof(To)); + } }); } @@ -142,7 +159,7 @@ void NEGEMMInterleavedPrepareBWrapperKernelTemplate::transform(cons template void NEGEMMInterleavedPrepareBWrapperKernelTemplate::create_workloads(std::vector &workloads) { - for_each_element_in_window(window(), _b, _transformed_b, _Nsize, _Ksize, [&workloads](PrepareBWorkload && wl) + for_each_element_in_window(window(), _b, _transformed_b, _Nsize, _Ksize, [&workloads](PrepareBWorkload && wl) { workloads.push_back(std::move(wl)); }); @@ -152,7 +169,7 @@ template void NEGEMMInterleavedPrepareBWrapperKernelTemplate::run(const Window &window, const ThreadInfo &info) { ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(window, INEKernel::window()); - for_each_element_in_window(window, _b, _transformed_b, _Nsize, _Ksize, [&](PrepareBWorkload && wl) + for_each_element_in_window(window, _b, _transformed_b, _Nsize, _Ksize, [&](PrepareBWorkload && wl) { this->transform(wl, info); }); -- cgit v1.2.1