aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/runtime/NEON/functions/NERNNLayer.h6
-rw-r--r--src/runtime/NEON/functions/NERNNLayer.cpp39
-rw-r--r--tests/datasets/LSTMLayerDataset.h9
-rw-r--r--tests/datasets/RNNLayerDataset.h2
4 files changed, 35 insertions, 21 deletions
diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h
index f1398eb3cc..bdba42d6ba 100644
--- a/arm_compute/runtime/NEON/functions/NERNNLayer.h
+++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
+#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
#include "arm_compute/runtime/NEON/INESimpleFunction.h"
#include "arm_compute/core/Types.h"
@@ -79,6 +80,7 @@ public:
// Inherited methods overridden:
void run() override;
+ void prepare() override;
private:
MemoryGroup _memory_group;
@@ -86,11 +88,11 @@ private:
NEArithmeticAdditionKernel _add_kernel;
NEActivationLayerKernel _activation_kernel;
NEFullyConnectedLayer _fully_connected_kernel;
+ NECopyKernel _copy_kernel;
Tensor _fully_connected_out;
Tensor _gemm_output;
Tensor _add_output;
- ITensor *_hidden_state;
- ITensor *_output;
+ bool _is_prepared;
};
} // namespace arm_compute
#endif /* __ARM_COMPUTE_NERNNLAYER_H__ */
diff --git a/src/runtime/NEON/functions/NERNNLayer.cpp b/src/runtime/NEON/functions/NERNNLayer.cpp
index f1e57c5983..995d5eed86 100644
--- a/src/runtime/NEON/functions/NERNNLayer.cpp
+++ b/src/runtime/NEON/functions/NERNNLayer.cpp
@@ -34,8 +34,8 @@
namespace arm_compute
{
NERNNLayer::NERNNLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation_kernel(), _fully_connected_kernel(), _fully_connected_out(), _gemm_output(), _add_output(), _hidden_state(),
- _output()
+ : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation_kernel(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(), _gemm_output(), _add_output(),
+ _is_prepared(false)
{
}
@@ -70,23 +70,25 @@ void NERNNLayer::configure(const ITensor *input, const ITensor *weights, const I
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, output);
ARM_COMPUTE_ERROR_THROW_ON(NERNNLayer::validate(input->info(), weights->info(), recurrent_weights->info(), bias->info(), hidden_state->info(), output->info(), info));
- _hidden_state = hidden_state;
- _output = output;
-
const int idx_height = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT);
TensorShape shape = misc::shape_calculator::compute_rnn_shape(recurrent_weights->info(), hidden_state->info()->dimension(idx_height));
+ _is_prepared = false;
+
// Manage intermediate buffers and configure
_fully_connected_out.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
+ _gemm_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
+
+ // Manage intermediate buffers and configure
_memory_group.manage(&_fully_connected_out);
_fully_connected_kernel.configure(input, weights, bias, &_fully_connected_out);
- _gemm_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
_memory_group.manage(&_gemm_output);
_gemm_state_f.configure(hidden_state, recurrent_weights, nullptr, &_gemm_output, 1.f, 0.f);
_add_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
_memory_group.manage(&_add_output);
+
_add_kernel.configure(&_fully_connected_out, &_gemm_output, &_add_output, ConvertPolicy::SATURATE);
_fully_connected_out.allocator()->allocate();
@@ -94,30 +96,37 @@ void NERNNLayer::configure(const ITensor *input, const ITensor *weights, const I
_activation_kernel.configure(&_add_output, hidden_state, info);
_add_output.allocator()->allocate();
+
+ _copy_kernel.configure(hidden_state, output);
}
void NERNNLayer::run()
{
+ prepare();
+
_memory_group.acquire();
_fully_connected_kernel.run();
+
_gemm_state_f.run();
+
NEScheduler::get().schedule(&_add_kernel, Window::DimY);
NEScheduler::get().schedule(&_activation_kernel, Window::DimY);
// copy hidden out to output
- Window output_window;
- output_window.use_tensor_dimensions(_output->info()->tensor_shape(), Window::DimY);
+ NEScheduler::get().schedule(&_copy_kernel, Window::DimY);
- Iterator hidden_state_it(_hidden_state, output_window);
- Iterator output_it(_output, output_window);
+ _memory_group.release();
+}
- execute_window_loop(output_window, [&](const Coordinates & id)
+void NERNNLayer::prepare()
+{
+ if(!_is_prepared)
{
- memcpy(output_it.ptr(), hidden_state_it.ptr(), _output->info()->dimension(0) * _output->info()->element_size());
- },
- hidden_state_it, output_it);
+ _fully_connected_kernel.prepare();
+ _gemm_state_f.prepare();
- _memory_group.release();
+ _is_prepared = true;
+ }
}
} // namespace arm_compute
diff --git a/tests/datasets/LSTMLayerDataset.h b/tests/datasets/LSTMLayerDataset.h
index a976caa0ba..c21f3208ce 100644
--- a/tests/datasets/LSTMLayerDataset.h
+++ b/tests/datasets/LSTMLayerDataset.h
@@ -160,9 +160,12 @@ class SmallLSTMLayerDataset final : public LSTMLayerDataset
public:
SmallLSTMLayerDataset()
{
- add_config(TensorShape(8U), TensorShape(8U, 16U), TensorShape(16U, 16U), TensorShape(16U), TensorShape(16U), TensorShape(16U), TensorShape(64U), ActivationLayerInfo(), 0.05f, 0.93f);
- add_config(TensorShape(8U, 2U), TensorShape(8U, 16U), TensorShape(16U, 16U), TensorShape(16U), TensorShape(16U, 2U), TensorShape(16U, 2U), TensorShape(64U, 2U), ActivationLayerInfo(), 0.05f, 0.93f);
- add_config(TensorShape(8U, 2U), TensorShape(8U, 16U), TensorShape(16U, 16U), TensorShape(16U), TensorShape(16U, 2U), TensorShape(16U, 2U), TensorShape(48U, 2U), ActivationLayerInfo(), 0.05f, 0.93f);
+ add_config(TensorShape(8U), TensorShape(8U, 16U), TensorShape(16U, 16U), TensorShape(16U), TensorShape(16U), TensorShape(16U), TensorShape(64U),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), 0.05f, 0.93f);
+ add_config(TensorShape(8U, 2U), TensorShape(8U, 16U), TensorShape(16U, 16U), TensorShape(16U), TensorShape(16U, 2U), TensorShape(16U, 2U), TensorShape(64U, 2U),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), 0.05f, 0.93f);
+ add_config(TensorShape(8U, 2U), TensorShape(8U, 16U), TensorShape(16U, 16U), TensorShape(16U), TensorShape(16U, 2U), TensorShape(16U, 2U), TensorShape(48U, 2U),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), 0.05f, 0.93f);
}
};
diff --git a/tests/datasets/RNNLayerDataset.h b/tests/datasets/RNNLayerDataset.h
index 40d1b934f3..5f42def676 100644
--- a/tests/datasets/RNNLayerDataset.h
+++ b/tests/datasets/RNNLayerDataset.h
@@ -131,7 +131,7 @@ class SmallRNNLayerDataset final : public RNNLayerDataset
public:
SmallRNNLayerDataset()
{
- add_config(TensorShape(128U, 16U), TensorShape(128U, 32U), TensorShape(32U, 32U), TensorShape(32U), TensorShape(32U, 16U), ActivationLayerInfo());
+ add_config(TensorShape(128U, 16U), TensorShape(128U, 32U), TensorShape(32U, 32U), TensorShape(32U), TensorShape(32U, 16U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
}
};