aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/NEON/functions/NERNNLayer.cpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-08-23 15:54:36 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commit3ada2b7a29e1ab2058ab7dc701cacff548d2aae9 (patch)
tree734a55577ca01011de6d7f1587afbf65b0ab1f38 /src/runtime/NEON/functions/NERNNLayer.cpp
parentf2cde9b29deee6423ea6fe9a1a9afc9ef61d2663 (diff)
downloadComputeLibrary-3ada2b7a29e1ab2058ab7dc701cacff548d2aae9.tar.gz
COMPMID-1534: Fix LSTM/RNN Layers for NEON and FP16
Switches default activation layer to the respective datasets to RELU from LOGISTIC Change-Id: I09f1ad09922ccdd6e1dc33c28a594f7ffbfe40f4 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145436 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/NERNNLayer.cpp')
-rw-r--r--src/runtime/NEON/functions/NERNNLayer.cpp39
1 files changed, 24 insertions, 15 deletions
diff --git a/src/runtime/NEON/functions/NERNNLayer.cpp b/src/runtime/NEON/functions/NERNNLayer.cpp
index f1e57c5983..995d5eed86 100644
--- a/src/runtime/NEON/functions/NERNNLayer.cpp
+++ b/src/runtime/NEON/functions/NERNNLayer.cpp
@@ -34,8 +34,8 @@
namespace arm_compute
{
NERNNLayer::NERNNLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation_kernel(), _fully_connected_kernel(), _fully_connected_out(), _gemm_output(), _add_output(), _hidden_state(),
- _output()
+ : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation_kernel(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(), _gemm_output(), _add_output(),
+ _is_prepared(false)
{
}
@@ -70,23 +70,25 @@ void NERNNLayer::configure(const ITensor *input, const ITensor *weights, const I
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, output);
ARM_COMPUTE_ERROR_THROW_ON(NERNNLayer::validate(input->info(), weights->info(), recurrent_weights->info(), bias->info(), hidden_state->info(), output->info(), info));
- _hidden_state = hidden_state;
- _output = output;
-
const int idx_height = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT);
TensorShape shape = misc::shape_calculator::compute_rnn_shape(recurrent_weights->info(), hidden_state->info()->dimension(idx_height));
+ _is_prepared = false;
+
// Manage intermediate buffers and configure
_fully_connected_out.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
+ _gemm_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
+
+ // Manage intermediate buffers and configure
_memory_group.manage(&_fully_connected_out);
_fully_connected_kernel.configure(input, weights, bias, &_fully_connected_out);
- _gemm_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
_memory_group.manage(&_gemm_output);
_gemm_state_f.configure(hidden_state, recurrent_weights, nullptr, &_gemm_output, 1.f, 0.f);
_add_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
_memory_group.manage(&_add_output);
+
_add_kernel.configure(&_fully_connected_out, &_gemm_output, &_add_output, ConvertPolicy::SATURATE);
_fully_connected_out.allocator()->allocate();
@@ -94,30 +96,37 @@ void NERNNLayer::configure(const ITensor *input, const ITensor *weights, const I
_activation_kernel.configure(&_add_output, hidden_state, info);
_add_output.allocator()->allocate();
+
+ _copy_kernel.configure(hidden_state, output);
}
void NERNNLayer::run()
{
+ prepare();
+
_memory_group.acquire();
_fully_connected_kernel.run();
+
_gemm_state_f.run();
+
NEScheduler::get().schedule(&_add_kernel, Window::DimY);
NEScheduler::get().schedule(&_activation_kernel, Window::DimY);
// copy hidden out to output
- Window output_window;
- output_window.use_tensor_dimensions(_output->info()->tensor_shape(), Window::DimY);
+ NEScheduler::get().schedule(&_copy_kernel, Window::DimY);
- Iterator hidden_state_it(_hidden_state, output_window);
- Iterator output_it(_output, output_window);
+ _memory_group.release();
+}
- execute_window_loop(output_window, [&](const Coordinates & id)
+void NERNNLayer::prepare()
+{
+ if(!_is_prepared)
{
- memcpy(output_it.ptr(), hidden_state_it.ptr(), _output->info()->dimension(0) * _output->info()->element_size());
- },
- hidden_state_it, output_it);
+ _fully_connected_kernel.prepare();
+ _gemm_state_f.prepare();
- _memory_group.release();
+ _is_prepared = true;
+ }
}
} // namespace arm_compute