From 3ada2b7a29e1ab2058ab7dc701cacff548d2aae9 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 23 Aug 2018 15:54:36 +0100 Subject: COMPMID-1534: Fix LSTM/RNN Layers for NEON and FP16 Switches default activation layer to the respective datasets to RELU from LOGISTIC Change-Id: I09f1ad09922ccdd6e1dc33c28a594f7ffbfe40f4 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145436 Reviewed-by: Anthony Barbier Tested-by: Jenkins --- arm_compute/runtime/NEON/functions/NERNNLayer.h | 6 ++-- src/runtime/NEON/functions/NERNNLayer.cpp | 39 +++++++++++++++---------- tests/datasets/LSTMLayerDataset.h | 9 ++++-- tests/datasets/RNNLayerDataset.h | 2 +- 4 files changed, 35 insertions(+), 21 deletions(-) diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h index f1398eb3cc..bdba42d6ba 100644 --- a/arm_compute/runtime/NEON/functions/NERNNLayer.h +++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h @@ -26,6 +26,7 @@ #include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" +#include "arm_compute/core/NEON/kernels/NECopyKernel.h" #include "arm_compute/runtime/NEON/INESimpleFunction.h" #include "arm_compute/core/Types.h" @@ -79,6 +80,7 @@ public: // Inherited methods overridden: void run() override; + void prepare() override; private: MemoryGroup _memory_group; @@ -86,11 +88,11 @@ private: NEArithmeticAdditionKernel _add_kernel; NEActivationLayerKernel _activation_kernel; NEFullyConnectedLayer _fully_connected_kernel; + NECopyKernel _copy_kernel; Tensor _fully_connected_out; Tensor _gemm_output; Tensor _add_output; - ITensor *_hidden_state; - ITensor *_output; + bool _is_prepared; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_NERNNLAYER_H__ */ diff --git a/src/runtime/NEON/functions/NERNNLayer.cpp b/src/runtime/NEON/functions/NERNNLayer.cpp index f1e57c5983..995d5eed86 100644 --- a/src/runtime/NEON/functions/NERNNLayer.cpp +++ b/src/runtime/NEON/functions/NERNNLayer.cpp @@ -34,8 +34,8 @@ namespace arm_compute { NERNNLayer::NERNNLayer(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation_kernel(), _fully_connected_kernel(), _fully_connected_out(), _gemm_output(), _add_output(), _hidden_state(), - _output() + : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation_kernel(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(), _gemm_output(), _add_output(), + _is_prepared(false) { } @@ -70,23 +70,25 @@ void NERNNLayer::configure(const ITensor *input, const ITensor *weights, const I ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, output); ARM_COMPUTE_ERROR_THROW_ON(NERNNLayer::validate(input->info(), weights->info(), recurrent_weights->info(), bias->info(), hidden_state->info(), output->info(), info)); - _hidden_state = hidden_state; - _output = output; - const int idx_height = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT); TensorShape shape = misc::shape_calculator::compute_rnn_shape(recurrent_weights->info(), hidden_state->info()->dimension(idx_height)); + _is_prepared = false; + // Manage intermediate buffers and configure _fully_connected_out.allocator()->init(TensorInfo(shape, 1, input->info()->data_type())); + _gemm_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type())); + + // Manage intermediate buffers and configure _memory_group.manage(&_fully_connected_out); _fully_connected_kernel.configure(input, weights, bias, &_fully_connected_out); - _gemm_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type())); _memory_group.manage(&_gemm_output); _gemm_state_f.configure(hidden_state, recurrent_weights, nullptr, &_gemm_output, 1.f, 0.f); _add_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type())); _memory_group.manage(&_add_output); + _add_kernel.configure(&_fully_connected_out, &_gemm_output, &_add_output, ConvertPolicy::SATURATE); _fully_connected_out.allocator()->allocate(); @@ -94,30 +96,37 @@ void NERNNLayer::configure(const ITensor *input, const ITensor *weights, const I _activation_kernel.configure(&_add_output, hidden_state, info); _add_output.allocator()->allocate(); + + _copy_kernel.configure(hidden_state, output); } void NERNNLayer::run() { + prepare(); + _memory_group.acquire(); _fully_connected_kernel.run(); + _gemm_state_f.run(); + NEScheduler::get().schedule(&_add_kernel, Window::DimY); NEScheduler::get().schedule(&_activation_kernel, Window::DimY); // copy hidden out to output - Window output_window; - output_window.use_tensor_dimensions(_output->info()->tensor_shape(), Window::DimY); + NEScheduler::get().schedule(&_copy_kernel, Window::DimY); - Iterator hidden_state_it(_hidden_state, output_window); - Iterator output_it(_output, output_window); + _memory_group.release(); +} - execute_window_loop(output_window, [&](const Coordinates & id) +void NERNNLayer::prepare() +{ + if(!_is_prepared) { - memcpy(output_it.ptr(), hidden_state_it.ptr(), _output->info()->dimension(0) * _output->info()->element_size()); - }, - hidden_state_it, output_it); + _fully_connected_kernel.prepare(); + _gemm_state_f.prepare(); - _memory_group.release(); + _is_prepared = true; + } } } // namespace arm_compute diff --git a/tests/datasets/LSTMLayerDataset.h b/tests/datasets/LSTMLayerDataset.h index a976caa0ba..c21f3208ce 100644 --- a/tests/datasets/LSTMLayerDataset.h +++ b/tests/datasets/LSTMLayerDataset.h @@ -160,9 +160,12 @@ class SmallLSTMLayerDataset final : public LSTMLayerDataset public: SmallLSTMLayerDataset() { - add_config(TensorShape(8U), TensorShape(8U, 16U), TensorShape(16U, 16U), TensorShape(16U), TensorShape(16U), TensorShape(16U), TensorShape(64U), ActivationLayerInfo(), 0.05f, 0.93f); - add_config(TensorShape(8U, 2U), TensorShape(8U, 16U), TensorShape(16U, 16U), TensorShape(16U), TensorShape(16U, 2U), TensorShape(16U, 2U), TensorShape(64U, 2U), ActivationLayerInfo(), 0.05f, 0.93f); - add_config(TensorShape(8U, 2U), TensorShape(8U, 16U), TensorShape(16U, 16U), TensorShape(16U), TensorShape(16U, 2U), TensorShape(16U, 2U), TensorShape(48U, 2U), ActivationLayerInfo(), 0.05f, 0.93f); + add_config(TensorShape(8U), TensorShape(8U, 16U), TensorShape(16U, 16U), TensorShape(16U), TensorShape(16U), TensorShape(16U), TensorShape(64U), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), 0.05f, 0.93f); + add_config(TensorShape(8U, 2U), TensorShape(8U, 16U), TensorShape(16U, 16U), TensorShape(16U), TensorShape(16U, 2U), TensorShape(16U, 2U), TensorShape(64U, 2U), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), 0.05f, 0.93f); + add_config(TensorShape(8U, 2U), TensorShape(8U, 16U), TensorShape(16U, 16U), TensorShape(16U), TensorShape(16U, 2U), TensorShape(16U, 2U), TensorShape(48U, 2U), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), 0.05f, 0.93f); } }; diff --git a/tests/datasets/RNNLayerDataset.h b/tests/datasets/RNNLayerDataset.h index 40d1b934f3..5f42def676 100644 --- a/tests/datasets/RNNLayerDataset.h +++ b/tests/datasets/RNNLayerDataset.h @@ -131,7 +131,7 @@ class SmallRNNLayerDataset final : public RNNLayerDataset public: SmallRNNLayerDataset() { - add_config(TensorShape(128U, 16U), TensorShape(128U, 32U), TensorShape(32U, 32U), TensorShape(32U), TensorShape(32U, 16U), ActivationLayerInfo()); + add_config(TensorShape(128U, 16U), TensorShape(128U, 32U), TensorShape(32U, 32U), TensorShape(32U), TensorShape(32U, 16U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); } }; -- cgit v1.2.1