From ac314c25f41e3b2be2ef9073377079584fc88861 Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Tue, 11 Sep 2018 17:49:10 +0100 Subject: COMPMID-1563: Fix name of NEGEMMInterleavedWrapper Change-Id: I5f868091cae7bd86eeeb7216d44f32c190c5a604 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/147804 Tested-by: bsgcomp Reviewed-by: Georgios Pinitas --- .../assembly/NEGEMMInterleavedWrapper.cpp | 79 ++++++++++++---------- 1 file changed, 44 insertions(+), 35 deletions(-) (limited to 'src/runtime/NEON/functions/assembly') diff --git a/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp b/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp index 69d59283ae..c87e82afb8 100644 --- a/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp +++ b/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp @@ -25,6 +25,7 @@ #include "arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h" #include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/kernels/assembly/Helpers.h" #include "arm_compute/core/NEON/kernels/assembly/NEGEMMInterleavedMatrixMultiplyWrapper.h" #include "arm_compute/core/NEON/kernels/assembly/NEGEMMInterleavedPrepareBWrapperKernel.h" #include "arm_compute/core/NEON/kernels/assembly/NEGEMMInterleavedTransformAWrapper.h" @@ -42,7 +43,7 @@ void NEGEMMInterleavedWrapper::run() prepare(); _memory_group.acquire(); - NEScheduler::get().run_tagged_workloads(_workloads, "NEGEMMInterleavedWrapper"); + NEScheduler::get().run_tagged_workloads(_workloads, _tag.c_str()); _memory_group.release(); } @@ -151,51 +152,59 @@ void NEGEMMInterleavedWrapper::configure(const ITensor *a, const ITensor *b, ITe const unsigned int alignment = 128; _transformed_b.allocator()->init(TensorInfo{}, alignment); _tmp_c.allocator()->init(TensorInfo{}, alignment); + _tag = "NEGEMMInterleaved_"; + _tag += get_strategy_name(input_type, use_dot); + if(!_pretranspose_b) { // If B is transposed at every iteration then transformed_B can be managed: _memory_group.manage(&_transformed_b); + _block_sizes = calculate_block_sizes_from_data_type(NEScheduler::get().cpu_info(), _params.M, _params.N, _params.K, input_type, use_dot); } - switch(input_type) + else { - case DataType::F32: - _prepare_b = instantiate_prepareB(_b, &_transformed_b, _params); - break; + _tag += "_preB"; + switch(input_type) + { + case DataType::F32: + _prepare_b = instantiate_prepareB(_b, &_transformed_b, _params); + break; #ifdef __aarch64__ - case DataType::U8: - case DataType::QASYMM8: - if(use_dot) - { - _prepare_b = instantiate_prepareB(_b, &_transformed_b, _params); - } - else - { - _prepare_b = instantiate_prepareB(_b, &_transformed_b, _params); - } - break; - case DataType::S8: - if(use_dot) - { - _prepare_b = instantiate_prepareB(_b, &_transformed_b, _params); - } - else - { - _prepare_b = instantiate_prepareB(_b, &_transformed_b, _params); - } - break; + case DataType::U8: + case DataType::QASYMM8: + if(use_dot) + { + _prepare_b = instantiate_prepareB(_b, &_transformed_b, _params); + } + else + { + _prepare_b = instantiate_prepareB(_b, &_transformed_b, _params); + } + break; + case DataType::S8: + if(use_dot) + { + _prepare_b = instantiate_prepareB(_b, &_transformed_b, _params); + } + else + { + _prepare_b = instantiate_prepareB(_b, &_transformed_b, _params); + } + break; #endif /* __aarch64__ */ #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F16: - _prepare_b = instantiate_prepareB<__fp16>(_b, &_transformed_b, _params); - break; + case DataType::F16: + _prepare_b = instantiate_prepareB<__fp16>(_b, &_transformed_b, _params); + break; #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - default: - ARM_COMPUTE_ERROR("DataType not supported"); - break; - } - ARM_COMPUTE_ERROR_ON(_prepare_b == nullptr); + default: + ARM_COMPUTE_ERROR("DataType not supported"); + break; + } + ARM_COMPUTE_ERROR_ON(_prepare_b == nullptr); - _block_sizes = _prepare_b->block_sizes(); + _block_sizes = _prepare_b->block_sizes(); + } _block_walker.set(Window::DimX, Window::Dimension(0, ceil_to_multiple(_params.N, _block_sizes.x_block), _block_sizes.x_block)); _block_walker.set(Window::DimY, Window::Dimension(0, ceil_to_multiple(_params.K, _block_sizes.k_block), _block_sizes.k_block)); -- cgit v1.2.1