From 08a4517905da959b6e3401cc24f5e2018f9b51ac Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Fri, 30 Nov 2018 17:20:26 +0000 Subject: COMPMID-1497: Use reshaped B strategy in GEMM by default NELSTM, NEFullyConnectedLayer(For quantised types only), NERNN and NEWinogradLayer were all defaulting to on-the-fly reshaping of B Fixed a bug in GemmInterleaved: it was ignoring the 'multis' dimension of the tensor to allocate the memory for B reshaped Change-Id: I7b30f7f57fc65d6a03cccde0bf5515a811f17b54 Reviewed-on: https://review.mlplatform.org/323 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- .../kernels/assembly/NEGEMMInterleavedPrepareBWrapperKernel.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/core/NEON') diff --git a/src/core/NEON/kernels/assembly/NEGEMMInterleavedPrepareBWrapperKernel.cpp b/src/core/NEON/kernels/assembly/NEGEMMInterleavedPrepareBWrapperKernel.cpp index 6c201cedb3..41a031c1c7 100644 --- a/src/core/NEON/kernels/assembly/NEGEMMInterleavedPrepareBWrapperKernel.cpp +++ b/src/core/NEON/kernels/assembly/NEGEMMInterleavedPrepareBWrapperKernel.cpp @@ -68,7 +68,7 @@ void for_each_element_in_window(const Window &window, const ITensor *b, ITensor // Calculate the size of transformed_b: template -unsigned int get_B_pretransposed_array_size(unsigned int N, unsigned int K, const BlockSizes &bs) +unsigned int get_B_pretransposed_array_size(unsigned int N, unsigned int K, const BlockSizes &bs, unsigned int multis) { using strategy = typename Kernel::strategy; @@ -89,6 +89,9 @@ unsigned int get_B_pretransposed_array_size(unsigned int N, unsigned int K, cons // Calculate the total size of the buffer: size_t total = num_full_k * normal_k_size * (num_full_x * normal_x_size + left_over_x_size); total += left_over_k_size * (left_over_x_size + num_full_x * normal_x_size); + + total *= multis; + return total; } @@ -114,7 +117,7 @@ void NEGEMMInterleavedPrepareBWrapperKernelTemplate::configure(cons _block_sizes = calculate_block_sizes(ci, params.M, params.N, params.K); - auto_init_if_empty(*transformed_b->info(), b->info()->clone()->set_tensor_shape(TensorShape{ get_B_pretransposed_array_size(_Nsize, _Ksize, _block_sizes) })); + auto_init_if_empty(*transformed_b->info(), b->info()->clone()->set_tensor_shape(TensorShape{ get_B_pretransposed_array_size(_Nsize, _Ksize, _block_sizes, multis) })); Window window; window.set(Window::DimX, Window::Dimension(0, ceil_to_multiple(_Nsize, _block_sizes.x_block), _block_sizes.x_block)); -- cgit v1.2.1