aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
diff options
context:
space:
mode:
authorFelix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>2023-09-27 17:46:17 +0100
committerfelixjohnny.thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>2023-09-28 12:08:05 +0000
commitafd38f0c617d6f89b2b4532c6c44f116617e2b6f (patch)
tree03bc7d5a762099989b16a656fa8d397b490ed70e /arm_compute/runtime/CL/functions/CLQLSTMLayer.h
parentbdcb4c148ee2fdeaaddf4cf1e57bbb0de02bb894 (diff)
downloadComputeLibrary-afd38f0c617d6f89b2b4532c6c44f116617e2b6f.tar.gz
Apply clang-format on repository
Code is formatted as per a revised clang format configuration file(not part of this delivery). Version 14.0.6 is used. Exclusion List: - files with .cl extension - files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...) And the following directories - compute_kernel_writer/validation/ - tests/ - include/ - src/core/NEON/kernels/convolution/ - src/core/NEON/kernels/arm_gemm/ - src/core/NEON/kernels/arm_conv/ - data/ There will be a follow up for formatting of .cl files and the files under tests/ and compute_kernel_writer/validation/. Signed-off-by: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Diffstat (limited to 'arm_compute/runtime/CL/functions/CLQLSTMLayer.h')
-rw-r--r--arm_compute/runtime/CL/functions/CLQLSTMLayer.h240
1 files changed, 136 insertions, 104 deletions
diff --git a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
index 1b0b759d74..3e76da086f 100644
--- a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
+++ b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
@@ -32,7 +32,6 @@
#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
#include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h"
#include "arm_compute/runtime/CL/functions/CLTranspose.h"
-
#include "arm_compute/runtime/common/LSTMParams.h"
namespace arm_compute
@@ -127,12 +126,21 @@ public:
* projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within
* [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
*/
- void configure(const ICLTensor *input,
- const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
- const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
- const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
- ICLTensor *cell_state_in, ICLTensor *output_state_in,
- ICLTensor *cell_state_out, ICLTensor *output_state_out, ICLTensor *output,
+ void configure(const ICLTensor *input,
+ const ICLTensor *input_to_forget_weights,
+ const ICLTensor *input_to_cell_weights,
+ const ICLTensor *input_to_output_weights,
+ const ICLTensor *recurrent_to_forget_weights,
+ const ICLTensor *recurrent_to_cell_weights,
+ const ICLTensor *recurrent_to_output_weights,
+ const ICLTensor *forget_gate_bias,
+ const ICLTensor *cell_bias,
+ const ICLTensor *output_gate_bias,
+ ICLTensor *cell_state_in,
+ ICLTensor *output_state_in,
+ ICLTensor *cell_state_out,
+ ICLTensor *output_state_out,
+ ICLTensor *output,
const LSTMParams<ICLTensor> &lstm_params);
/** Initialize function's tensors.
@@ -177,12 +185,22 @@ public:
* projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within
* [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input,
- const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
- const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
- const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
- ICLTensor *cell_state_in, ICLTensor *output_state_in,
- ICLTensor *cell_state_out, ICLTensor *output_state_out, ICLTensor *output,
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const ICLTensor *input_to_forget_weights,
+ const ICLTensor *input_to_cell_weights,
+ const ICLTensor *input_to_output_weights,
+ const ICLTensor *recurrent_to_forget_weights,
+ const ICLTensor *recurrent_to_cell_weights,
+ const ICLTensor *recurrent_to_output_weights,
+ const ICLTensor *forget_gate_bias,
+ const ICLTensor *cell_bias,
+ const ICLTensor *output_gate_bias,
+ ICLTensor *cell_state_in,
+ ICLTensor *output_state_in,
+ ICLTensor *cell_state_out,
+ ICLTensor *output_state_out,
+ ICLTensor *output,
const LSTMParams<ICLTensor> &lstm_params);
/** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayer
@@ -227,12 +245,21 @@ public:
* [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
* @return a status
*/
- static Status validate(const ITensorInfo *input,
- const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
- const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
- const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
- const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in,
- const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output,
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *input_to_forget_weights,
+ const ITensorInfo *input_to_cell_weights,
+ const ITensorInfo *input_to_output_weights,
+ const ITensorInfo *recurrent_to_forget_weights,
+ const ITensorInfo *recurrent_to_cell_weights,
+ const ITensorInfo *recurrent_to_output_weights,
+ const ITensorInfo *forget_gate_bias,
+ const ITensorInfo *cell_bias,
+ const ITensorInfo *output_gate_bias,
+ const ITensorInfo *cell_state_in,
+ const ITensorInfo *output_state_in,
+ const ITensorInfo *cell_state_out,
+ const ITensorInfo *output_state_out,
+ const ITensorInfo *output,
const LSTMParams<ITensorInfo> &lstm_params);
// Inherited methods overridden:
@@ -266,10 +293,18 @@ private:
* @param[in] mm_res_info Tensor info to be used to initialize output stage result tensor.
*
*/
- void configure_mm(const CLCompileContext &compile_context, CLGEMMLowpMatrixMultiplyCore &mm, CLGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info,
- const ICLTensor *mm_input, const ICLTensor *mm_weights, const ICLTensor *bias, CLTensor *mm_res,
- CLTensor *outstage_res, float gemmlowp_scale,
- const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info);
+ void configure_mm(const CLCompileContext &compile_context,
+ CLGEMMLowpMatrixMultiplyCore &mm,
+ CLGEMMLowpOutputStage &outstage,
+ GEMMLowpOutputStageInfo &gemmlowp_info,
+ const ICLTensor *mm_input,
+ const ICLTensor *mm_weights,
+ const ICLTensor *bias,
+ CLTensor *mm_res,
+ CLTensor *outstage_res,
+ float gemmlowp_scale,
+ const TensorInfo &mm_res_info,
+ const TensorInfo &outstage_tensor_info);
MemoryGroup _memory_group{};
@@ -278,8 +313,8 @@ private:
{
static constexpr uint32_t max_dimension_supported = 2;
- ICLTensor *_src{ nullptr };
- ICLTensor *_dst{ nullptr };
+ ICLTensor *_src{nullptr};
+ ICLTensor *_dst{nullptr};
size_t _row_size{};
Window _window{};
@@ -368,7 +403,7 @@ private:
CLArithmeticAddition _accumulate_projection{};
CLActivationLayer _projection_clip{};
std::array<std::unique_ptr<CLQLSTMLayerNormalizationKernel>, _layer_norm_count> _layer_norms;
- CLCopy _copy_output;
+ CLCopy _copy_output;
TensorCopyKernel _projection_bias_copy{};
TensorCopyKernel _projection_output_to_accumulate_copy{};
@@ -376,21 +411,18 @@ private:
TensorCopyKernel _hidden_to_output_copy{};
// Tensor pointers
- const ICLTensor *_input_to_input_weights
- {
- nullptr
- };
- const ICLTensor *_recurrent_to_input_weights{ nullptr };
- const ICLTensor *_projection_bias{ nullptr };
- const ICLTensor *_input_to_forget_weights{ nullptr };
- const ICLTensor *_input_to_cell_weights{ nullptr };
- const ICLTensor *_input_to_output_weights{ nullptr };
- const ICLTensor *_recurrent_to_forget_weights{ nullptr };
- const ICLTensor *_recurrent_to_cell_weights{ nullptr };
- const ICLTensor *_recurrent_to_output_weights{ nullptr };
- const ICLTensor *_projection_weights{ nullptr };
- std::array<const ICLTensor *, _layer_norm_count> _layer_norm_weights{ {} };
- std::array<const ICLTensor *, _layer_norm_count> _layer_norm_bias{ {} };
+ const ICLTensor *_input_to_input_weights{nullptr};
+ const ICLTensor *_recurrent_to_input_weights{nullptr};
+ const ICLTensor *_projection_bias{nullptr};
+ const ICLTensor *_input_to_forget_weights{nullptr};
+ const ICLTensor *_input_to_cell_weights{nullptr};
+ const ICLTensor *_input_to_output_weights{nullptr};
+ const ICLTensor *_recurrent_to_forget_weights{nullptr};
+ const ICLTensor *_recurrent_to_cell_weights{nullptr};
+ const ICLTensor *_recurrent_to_output_weights{nullptr};
+ const ICLTensor *_projection_weights{nullptr};
+ std::array<const ICLTensor *, _layer_norm_count> _layer_norm_weights{{}};
+ std::array<const ICLTensor *, _layer_norm_count> _layer_norm_bias{{}};
using LayerNormIndexType = typename std::underlying_type<LayerNormGate>::type;
inline LayerNormIndexType getGateIndex(LayerNormGate g)
@@ -423,78 +455,78 @@ private:
return *_layer_norms[getGateIndex(g)];
}
- inline void configure_layer_norm(LayerNormGate g, const ICLTensor *in);
+ inline void configure_layer_norm(LayerNormGate g, const ICLTensor *in);
inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias);
// Temporary tensors
- CLTensor _input_to_forget_weights_transposed{ nullptr };
- CLTensor _input_to_cell_weights_transposed{ nullptr };
- CLTensor _input_to_output_weights_transposed{ nullptr };
- CLTensor _input_to_input_weights_transposed{ nullptr };
- CLTensor _recurrent_to_forget_weights_transposed{ nullptr };
- CLTensor _recurrent_to_cell_weights_transposed{ nullptr };
- CLTensor _recurrent_to_output_weights_transposed{ nullptr };
- CLTensor _recurrent_to_input_weights_transposed{ nullptr };
- CLTensor _projection_weights_transposed{ nullptr };
- CLTensor _input_to_input_eff_bias{ nullptr };
- CLTensor _recurrent_to_input_eff_bias{ nullptr };
- CLTensor _input_to_forget_eff_bias{ nullptr };
- CLTensor _recurrent_to_forget_eff_bias{ nullptr };
- CLTensor _input_to_cell_eff_bias{ nullptr };
- CLTensor _recurrent_to_cell_eff_bias{ nullptr };
- CLTensor _input_to_output_eff_bias{ nullptr };
- CLTensor _recurrent_to_output_eff_bias{ nullptr };
- CLTensor _projection_reduction_res{ nullptr };
- CLTensor _projection_eff_bias{ nullptr };
- CLTensor _mm_input_to_forget_res{ nullptr };
- CLTensor _mm_recurrent_to_forget_res{ nullptr };
- CLTensor _mul_cell_to_forget_res{ nullptr };
- CLTensor _input_to_forget_outstage_res{ nullptr };
- CLTensor _cell_to_forget_outstage_res{ nullptr };
- CLTensor _recurrent_to_forget_outstage_res{ nullptr };
- CLTensor _forget_gate{ nullptr };
- CLTensor _mm_input_to_cell_res{ nullptr };
- CLTensor _input_to_cell_outstage_res{ nullptr };
- CLTensor _mm_recurrent_to_cell_res{ nullptr };
- CLTensor _recurrent_to_cell_outstage_res{ nullptr };
- CLTensor _cell_gate{ nullptr };
- CLTensor _mul_input_cell_res{ nullptr };
- CLTensor _mm_input_to_input_res{ nullptr };
- CLTensor _input_to_input_outstage_res{ nullptr };
- CLTensor _mm_recurrent_to_input_res{ nullptr };
- CLTensor _mul_cell_to_input_res{ nullptr };
- CLTensor _cell_to_input_outstage_res{ nullptr };
- CLTensor _recurrent_to_input_outstage_res{ nullptr };
- CLTensor _input_gate{ nullptr };
- CLTensor _mm_input_to_output_res{ nullptr };
- CLTensor _input_to_output_outstage_res{ nullptr };
- CLTensor _mm_recurrent_to_output_res{ nullptr };
- CLTensor _mul_cell_to_output_res{ nullptr };
- CLTensor _cell_to_output_outstage_res{ nullptr };
- CLTensor _recurrent_to_output_outstage_res{ nullptr };
- CLTensor _output_gate{ nullptr };
- CLTensor _hidden_mul_res{ nullptr };
- CLTensor _hidden_gate{ nullptr };
- CLTensor _mm_projection_res{ nullptr };
- CLTensor _projection_outstage_res{ nullptr };
- CLTensor _projection_out_res{ nullptr };
- CLTensor _projection_accumulate_res{ nullptr };
- CLTensor _ones{ nullptr };
- std::array<CLTensor, _layer_norm_count> _layer_norm_output{ {} };
+ CLTensor _input_to_forget_weights_transposed{nullptr};
+ CLTensor _input_to_cell_weights_transposed{nullptr};
+ CLTensor _input_to_output_weights_transposed{nullptr};
+ CLTensor _input_to_input_weights_transposed{nullptr};
+ CLTensor _recurrent_to_forget_weights_transposed{nullptr};
+ CLTensor _recurrent_to_cell_weights_transposed{nullptr};
+ CLTensor _recurrent_to_output_weights_transposed{nullptr};
+ CLTensor _recurrent_to_input_weights_transposed{nullptr};
+ CLTensor _projection_weights_transposed{nullptr};
+ CLTensor _input_to_input_eff_bias{nullptr};
+ CLTensor _recurrent_to_input_eff_bias{nullptr};
+ CLTensor _input_to_forget_eff_bias{nullptr};
+ CLTensor _recurrent_to_forget_eff_bias{nullptr};
+ CLTensor _input_to_cell_eff_bias{nullptr};
+ CLTensor _recurrent_to_cell_eff_bias{nullptr};
+ CLTensor _input_to_output_eff_bias{nullptr};
+ CLTensor _recurrent_to_output_eff_bias{nullptr};
+ CLTensor _projection_reduction_res{nullptr};
+ CLTensor _projection_eff_bias{nullptr};
+ CLTensor _mm_input_to_forget_res{nullptr};
+ CLTensor _mm_recurrent_to_forget_res{nullptr};
+ CLTensor _mul_cell_to_forget_res{nullptr};
+ CLTensor _input_to_forget_outstage_res{nullptr};
+ CLTensor _cell_to_forget_outstage_res{nullptr};
+ CLTensor _recurrent_to_forget_outstage_res{nullptr};
+ CLTensor _forget_gate{nullptr};
+ CLTensor _mm_input_to_cell_res{nullptr};
+ CLTensor _input_to_cell_outstage_res{nullptr};
+ CLTensor _mm_recurrent_to_cell_res{nullptr};
+ CLTensor _recurrent_to_cell_outstage_res{nullptr};
+ CLTensor _cell_gate{nullptr};
+ CLTensor _mul_input_cell_res{nullptr};
+ CLTensor _mm_input_to_input_res{nullptr};
+ CLTensor _input_to_input_outstage_res{nullptr};
+ CLTensor _mm_recurrent_to_input_res{nullptr};
+ CLTensor _mul_cell_to_input_res{nullptr};
+ CLTensor _cell_to_input_outstage_res{nullptr};
+ CLTensor _recurrent_to_input_outstage_res{nullptr};
+ CLTensor _input_gate{nullptr};
+ CLTensor _mm_input_to_output_res{nullptr};
+ CLTensor _input_to_output_outstage_res{nullptr};
+ CLTensor _mm_recurrent_to_output_res{nullptr};
+ CLTensor _mul_cell_to_output_res{nullptr};
+ CLTensor _cell_to_output_outstage_res{nullptr};
+ CLTensor _recurrent_to_output_outstage_res{nullptr};
+ CLTensor _output_gate{nullptr};
+ CLTensor _hidden_mul_res{nullptr};
+ CLTensor _hidden_gate{nullptr};
+ CLTensor _mm_projection_res{nullptr};
+ CLTensor _projection_outstage_res{nullptr};
+ CLTensor _projection_out_res{nullptr};
+ CLTensor _projection_accumulate_res{nullptr};
+ CLTensor _ones{nullptr};
+ std::array<CLTensor, _layer_norm_count> _layer_norm_output{{}};
inline CLTensor &get_layer_norm_output(LayerNormGate g)
{
return _layer_norm_output[getGateIndex(g)];
}
- bool _is_prepared{ false };
- bool _has_cifg{ false };
- bool _has_cell_clipping{ false };
- bool _has_projection{ false };
- bool _has_projection_clipping{ false };
- bool _has_peephole{ false };
- bool _has_layer_norm{ false };
- bool _projection_tensor_copy_required{ false };
+ bool _is_prepared{false};
+ bool _has_cifg{false};
+ bool _has_cell_clipping{false};
+ bool _has_projection{false};
+ bool _has_projection_clipping{false};
+ bool _has_peephole{false};
+ bool _has_layer_norm{false};
+ bool _projection_tensor_copy_required{false};
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLQLSTMLAYER_H */