aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/NEON/functions/NEQLSTMLayer.cpp
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2020-10-21 00:04:14 +0100
committerMichalis Spyrou <michalis.spyrou@arm.com>2020-11-03 15:10:47 +0000
commitebcebf1dee7f8314976b1e0cabd62b4cf893d765 (patch)
tree95d3e691a0e88a3e213a1d30446a9224497f2055 /src/runtime/NEON/functions/NEQLSTMLayer.cpp
parentda4b1b2055d96aaf73704eb9b0b82d74dc2d699c (diff)
downloadComputeLibrary-ebcebf1dee7f8314976b1e0cabd62b4cf893d765.tar.gz
COMPMID-3638: Move NEON kernels
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Change-Id: Ieed3e4bc8be7fef80c90c5094599b477a56fc473 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4285 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/NEQLSTMLayer.cpp')
-rw-r--r--src/runtime/NEON/functions/NEQLSTMLayer.cpp108
1 files changed, 83 insertions, 25 deletions
diff --git a/src/runtime/NEON/functions/NEQLSTMLayer.cpp b/src/runtime/NEON/functions/NEQLSTMLayer.cpp
index e41962451c..1013730235 100644
--- a/src/runtime/NEON/functions/NEQLSTMLayer.cpp
+++ b/src/runtime/NEON/functions/NEQLSTMLayer.cpp
@@ -30,7 +30,16 @@
#include "arm_compute/core/utils/misc/InfoHelpers.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
@@ -47,6 +56,31 @@ Status validate_mm(GEMMLowpOutputStageInfo &gemmlowp_info, const ITensorInfo *mm
}
} // namespace
+Status NEQLSTMLayer::validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias)
+{
+ // Output quantization scale will be different, but ignored here
+ // since it will be configured at configure() stage.
+ const TensorInfo out
+ {
+ in
+ };
+ return NEQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias);
+}
+
+void NEQLSTMLayer::configure_layer_norm(NEQLSTMLayer::LayerNormGate g, const ITensor *in)
+{
+ ARM_COMPUTE_ERROR_ON(!_has_layer_norm);
+
+ Tensor &out = get_layer_norm_output(g);
+ _memory_group.manage(&out);
+ out.allocator()->init(*(in->info()));
+
+ get_layer_norm(g) = arm_compute::support::cpp14::make_unique<NEQLSTMLayerNormalizationKernel>();
+ get_layer_norm(g)->configure(in, &out, get_layer_norm_weight(g), get_layer_norm_bias(g));
+}
+
+NEQLSTMLayer::TensorCopyKernel::~TensorCopyKernel() = default;
+
Status NEQLSTMLayer::TensorCopyKernel::validate(const ITensorInfo &src, const ITensorInfo &dst)
{
ARM_COMPUTE_RETURN_ERROR_ON(src.tensor_shape().num_dimensions() > max_dimension_supported);
@@ -77,7 +111,21 @@ void NEQLSTMLayer::TensorCopyKernel::run()
input_iter, output_iter);
}
+NEQLSTMLayer::~NEQLSTMLayer() = default;
+
NEQLSTMLayer::NEQLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(), _transpose_input_to_forget_weights(), _transpose_input_to_cell_weights(), _transpose_input_to_output_weights(), _transpose_input_to_input_weights(),
+ _transpose_recurrent_to_forget_weights(), _transpose_recurrent_to_cell_weights(), _transpose_recurrent_to_output_weights(), _transpose_recurrent_to_input_weights(), _transpose_projection_weights(),
+ _input_to_input_reduction(), _recurrent_to_input_reduction(), _input_to_forget_reduction(), _recurrent_to_forget_reduction(), _input_to_cell_reduction(), _recurrent_to_cell_reduction(),
+ _input_to_output_reduction(), _recurrent_to_output_reduction(), _projection_reduction(), _projection_bias_add(), _mm_input_to_forget(), _mm_recurrent_to_forget(), _pixelwise_mul_cell_to_forget(),
+ _input_to_forget_outstage(), _recurrent_to_forget_outstage(), _cell_to_forget_outstage(), _accumulate_input_recurrent_forget(), _accumulate_cell_forget(), _forget_gate_sigmoid(), _mm_input_to_cell(),
+ _input_to_cell_outstage(), _mm_recurrent_to_cell(), _recurrent_to_cell_outstage(), _accumulate_input_recurrent_modulation(), _cell_gate_tanh(), _input_gate_sub(), _mm_input_to_input(),
+ _input_to_input_outstage(), _mm_recurrent_to_input(), _recurrent_to_input_outstage(), _accumulate_input_recurrent_input(), _pixelwise_mul_cell_to_input(), _cell_to_input_outstage(),
+ _accumulate_cell_input(), _input_gate_sigmoid(), _pixelwise_mul_forget_cell(), _pixelwise_mul_input_cell(), _add_forget_cell(), _cell_clip(), _mm_input_to_output(), _input_to_output_outstage(),
+ _mm_recurrent_to_output(), _recurrent_to_output_outstage(), _accumulate_input_recurrent_output(), _pixelwise_mul_cell_to_output(), _cell_to_output_outstage(), _accumulate_cell_to_output(),
+ _output_gate_sigmoid(), _hidden_tanh(), _pixelwise_mul_hidden(), _hidden_outstage(), _mm_projection(), _projection_outstage(), _accumulate_projection(), _projection_clip(), _projection_bias_copy(),
+ _projection_output_to_accumulate_copy(), _projection_accumulate_to_output_copy(), _hidden_to_output_copy(), _layer_norms(), _copy_output(), _layer_norm_weights(), _layer_norm_bias(),
+ _layer_norm_output()
{
_memory_group = MemoryGroup(std::move(memory_manager));
}
@@ -178,18 +226,29 @@ void NEQLSTMLayer::configure(const ITensor *input,
_input_to_input_weights = lstm_params.input_to_input_weights();
_recurrent_to_input_weights = lstm_params.recurrent_to_input_weights();
- _input_to_input_reduction.configure(_input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
- _recurrent_to_input_reduction.configure(_recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+ _input_to_input_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _recurrent_to_input_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _input_to_input_reduction->configure(_input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+ _recurrent_to_input_reduction->configure(_recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
}
- _input_to_forget_reduction.configure(input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
- _recurrent_to_forget_reduction.configure(recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
- _input_to_cell_reduction.configure(input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
- _recurrent_to_cell_reduction.configure(recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
- _input_to_output_reduction.configure(input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
- _recurrent_to_output_reduction.configure(recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+
+ _input_to_forget_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _recurrent_to_forget_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _input_to_cell_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _recurrent_to_cell_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _input_to_output_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _recurrent_to_output_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+
+ _recurrent_to_cell_reduction->configure(input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+ _recurrent_to_forget_reduction->configure(recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+ _input_to_cell_reduction->configure(input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+ _recurrent_to_cell_reduction->configure(recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+ _input_to_output_reduction->configure(input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+ _recurrent_to_output_reduction->configure(recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
if(_has_projection)
{
- _projection_reduction.configure(_projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true));
+ _projection_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _projection_reduction->configure(_projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true));
if(_projection_bias != nullptr)
{
_projection_bias_add.configure(_projection_bias, &_projection_eff_bias, &_projection_eff_bias, ConvertPolicy::SATURATE);
@@ -878,7 +937,7 @@ Status NEQLSTMLayer::validate(const ITensorInfo *input,
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output_state_in, output_state_out);
}
- ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(output_state_out, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(NECopy::validate(output_state_out, output));
return Status{};
}
@@ -906,7 +965,7 @@ void NEQLSTMLayer::run()
if(_has_layer_norm)
{
- NEScheduler::get().schedule(&get_layer_norm(LayerNormGate::Forget), Window::DimY);
+ NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Forget).get(), Window::DimY);
}
_forget_gate_sigmoid.run();
@@ -921,7 +980,7 @@ void NEQLSTMLayer::run()
if(_has_layer_norm)
{
- NEScheduler::get().schedule(&get_layer_norm(LayerNormGate::Cell), Window::DimY);
+ NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Cell).get(), Window::DimY);
}
_cell_gate_tanh.run();
@@ -948,7 +1007,7 @@ void NEQLSTMLayer::run()
if(_has_layer_norm)
{
- NEScheduler::get().schedule(&get_layer_norm(LayerNormGate::Input), Window::DimY);
+ NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Input).get(), Window::DimY);
}
_input_gate_sigmoid.run();
@@ -979,7 +1038,7 @@ void NEQLSTMLayer::run()
if(_has_layer_norm)
{
- NEScheduler::get().schedule(&get_layer_norm(LayerNormGate::Output), Window::DimY);
+ NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Output).get(), Window::DimY);
}
_output_gate_sigmoid.run();
@@ -1021,7 +1080,7 @@ void NEQLSTMLayer::run()
}
// Copy output_state_out to output
- NEScheduler::get().schedule(&_copy_output, Window::DimY);
+ _copy_output.run();
}
void NEQLSTMLayer::prepare()
@@ -1051,8 +1110,8 @@ void NEQLSTMLayer::prepare()
{
_input_to_input_eff_bias.allocator()->allocate();
_recurrent_to_input_eff_bias.allocator()->allocate();
- NEScheduler::get().schedule(&_input_to_input_reduction, Window::DimY);
- NEScheduler::get().schedule(&_recurrent_to_input_reduction, Window::DimY);
+ NEScheduler::get().schedule(_input_to_input_reduction.get(), Window::DimY);
+ NEScheduler::get().schedule(_recurrent_to_input_reduction.get(), Window::DimY);
_input_to_input_weights_transposed.allocator()->allocate();
_recurrent_to_input_weights_transposed.allocator()->allocate();
@@ -1067,17 +1126,17 @@ void NEQLSTMLayer::prepare()
_recurrent_to_cell_eff_bias.allocator()->allocate();
_input_to_output_eff_bias.allocator()->allocate();
_recurrent_to_output_eff_bias.allocator()->allocate();
- NEScheduler::get().schedule(&_input_to_forget_reduction, Window::DimY);
- NEScheduler::get().schedule(&_recurrent_to_forget_reduction, Window::DimY);
- NEScheduler::get().schedule(&_input_to_cell_reduction, Window::DimY);
- NEScheduler::get().schedule(&_recurrent_to_cell_reduction, Window::DimY);
- NEScheduler::get().schedule(&_input_to_output_reduction, Window::DimY);
- NEScheduler::get().schedule(&_recurrent_to_output_reduction, Window::DimY);
+ NEScheduler::get().schedule(_input_to_forget_reduction.get(), Window::DimY);
+ NEScheduler::get().schedule(_recurrent_to_forget_reduction.get(), Window::DimY);
+ NEScheduler::get().schedule(_input_to_cell_reduction.get(), Window::DimY);
+ NEScheduler::get().schedule(_recurrent_to_cell_reduction.get(), Window::DimY);
+ NEScheduler::get().schedule(_input_to_output_reduction.get(), Window::DimY);
+ NEScheduler::get().schedule(_recurrent_to_output_reduction.get(), Window::DimY);
if(_has_projection)
{
_projection_eff_bias.allocator()->allocate();
- NEScheduler::get().schedule(&_projection_reduction, Window::DimY);
+ NEScheduler::get().schedule(_projection_reduction.get(), Window::DimY);
if(_projection_bias != nullptr)
{
_projection_bias_add.run();
@@ -1106,5 +1165,4 @@ void NEQLSTMLayer::prepare()
_is_prepared = true;
}
}
-
} // namespace arm_compute