aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorSang-Hoon Park <sang-hoon.park@arm.com>2020-05-06 21:01:19 +0100
committerSang-Hoon Park <sang-hoon.park@arm.com>2020-05-11 11:37:41 +0000
commitd5c020a20514cad8c78f0ab2cc46a03607854a49 (patch)
treee4fdb01d446022267aaaced2021ad61399b6f34b /arm_compute
parent6f8b17dedb7b53b550e6210fd1c78c3a3e086271 (diff)
downloadComputeLibrary-d5c020a20514cad8c78f0ab2cc46a03607854a49.tar.gz
COMPMID-3239: Fix projection and peephole in NEQLSTMLayer
- Peephole and projection has been fixed to be working - Small internal kernel copying data between tensors to cover the case where num_units and output_size is different is added. Below is strictly outside of this patch's scope but are changes helping this patch working (directly or indirectly) or making NEQLSTM more complete. - Consideration for layer normalization is added to InfoHelpers - QSYMM8 data type is added to helper function to print out tensors. - NE/CLLSTMLayer::validate() logic has been modified to use correct value for shape validation. Change-Id: I40b4e71dfdbe8432caa2fe4a9af60a725362cc33 Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3157 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/utils/misc/InfoHelpers.h17
-rw-r--r--arm_compute/runtime/NEON/functions/NEQLSTMLayer.h47
2 files changed, 62 insertions, 2 deletions
diff --git a/arm_compute/core/utils/misc/InfoHelpers.h b/arm_compute/core/utils/misc/InfoHelpers.h
index 8cf701c124..6ecda7a0dd 100644
--- a/arm_compute/core/utils/misc/InfoHelpers.h
+++ b/arm_compute/core/utils/misc/InfoHelpers.h
@@ -90,6 +90,23 @@ inline void build_lstm_params_tensor_info(const LSTMParams<T> &lstm_params,
lstm_params_info->set_cifg_params(lstm_params.input_to_input_weights()->info(), lstm_params.recurrent_to_input_weights()->info(),
cell_to_input_weights_info, lstm_params.input_gate_bias()->info());
}
+ if(lstm_params.use_layer_norm())
+ {
+ ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.forget_layer_norm_weights(),
+ lstm_params.output_layer_norm_weights(),
+ lstm_params.cell_layer_norm_weights());
+ if(!lstm_params.has_cifg_opt())
+ {
+ ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.input_layer_norm_weights());
+ }
+
+ const ITensorInfo *forget_info = lstm_params.forget_layer_norm_weights()->info();
+ const ITensorInfo *cell_info = lstm_params.cell_layer_norm_weights()->info();
+ const ITensorInfo *output_info = lstm_params.output_layer_norm_weights()->info();
+ const ITensorInfo *input_info = lstm_params.has_cifg_opt() ? nullptr : lstm_params.input_layer_norm_weights()->info();
+
+ lstm_params_info->set_layer_normalization_params(input_info, forget_info, cell_info, output_info);
+ }
}
} // namespace info_helpers
} // namespace utils
diff --git a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
index 0553e4f266..9eb0654cfe 100644
--- a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
@@ -178,7 +178,8 @@ private:
Output,
Count
};
- static constexpr uint8_t _layer_norm_count = static_cast<uint8_t>(LayerNormGate::Count);
+ static constexpr uint8_t _layer_norm_count = static_cast<uint8_t>(LayerNormGate::Count);
+ static constexpr uint32_t _out_state_output_size_dimension_idx = 0;
/** Internal method to configure matrix multiplication plus output stage of each gate.
*
@@ -201,6 +202,35 @@ private:
MemoryGroup _memory_group{};
+ /** A small internel kernel do the copy between two tensors */
+ class TensorCopyKernel
+ {
+ static constexpr uint32_t max_dimension_supported = 2;
+
+ ITensor *_src{ nullptr };
+ ITensor *_dst{ nullptr };
+ size_t _row_size{};
+ Window _window{};
+
+ public:
+ /** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayer::TensorCopyKernel
+ *
+ * @param[in] src Source tensor info.
+ * @param[in] dst Destination tensor info
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo &src, const ITensorInfo &dst);
+ /** Set the input and output tensors.
+ *
+ * @param[in] src Source tensor
+ * @param[out] dst Destination tensor
+ */
+ void configure(ITensor &src, ITensor &dst);
+ /** run the kernel */
+ void run();
+ };
+
// Functions used
NETranspose _transpose_input_to_forget_weights{};
NETranspose _transpose_input_to_cell_weights{};
@@ -245,7 +275,7 @@ private:
NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_to_input{};
NEGEMMLowpOutputStage _cell_to_input_outstage{};
NEArithmeticAdditionKernel _accumulate_cell_input{};
- NEActivationLayer _input_gate_tanh{};
+ NEActivationLayer _input_gate_sigmoid{};
NEPixelWiseMultiplicationKernel _pixelwise_mul_forget_cell{};
NEPixelWiseMultiplicationKernel _pixelwise_mul_input_cell{};
NEArithmeticAdditionKernel _add_forget_cell{};
@@ -256,6 +286,7 @@ private:
NEGEMMLowpOutputStage _recurrent_to_output_outstage{};
NEArithmeticAdditionKernel _accumulate_input_recurrent_output{};
NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_to_output{};
+ NEGEMMLowpOutputStage _cell_to_output_outstage{};
NEArithmeticAdditionKernel _accumulate_cell_to_output{};
NEActivationLayer _output_gate_sigmoid{};
NEActivationLayer _hidden_tanh{};
@@ -265,6 +296,12 @@ private:
NEGEMMLowpOutputStage _projection_outstage{};
NEArithmeticAdditionKernel _accumulate_projection{};
NEActivationLayer _projection_clip{};
+
+ TensorCopyKernel _projection_bias_copy{};
+ TensorCopyKernel _projection_output_to_accumulate_copy{};
+ TensorCopyKernel _projection_accumulate_to_output_copy{};
+ TensorCopyKernel _hidden_to_output_copy{};
+
std::array<NEQLSTMLayerNormalizationKernel, _layer_norm_count> _layer_norms{ {} };
// Tensor pointers
@@ -375,11 +412,16 @@ private:
Tensor _input_to_output_outstage_res{ nullptr };
Tensor _mm_recurrent_to_output_res{ nullptr };
Tensor _mul_cell_to_output_res{ nullptr };
+ Tensor _cell_to_output_outstage_res{ nullptr };
Tensor _recurrent_to_output_outstage_res{ nullptr };
Tensor _output_gate{ nullptr };
Tensor _hidden_mul_res{ nullptr };
+ Tensor _hidden_gate{ nullptr };
Tensor _mm_projection_res{ nullptr };
Tensor _projection_outstage_res{ nullptr };
+ Tensor _projection_out_res{ nullptr };
+ Tensor _projection_eff_bias_adjusted{ nullptr };
+ Tensor _projection_accumulate_res{ nullptr };
Tensor _ones{ nullptr };
std::array<Tensor, _layer_norm_count> _layer_norm_output{ {} };
@@ -395,6 +437,7 @@ private:
bool _has_projection_clipping{ false };
bool _has_peephole{ false };
bool _has_layer_norm{ false };
+ bool _projection_tensor_copy_required{ false };
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEQLSTMLAYER_H */