diff options
Diffstat (limited to 'arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h')
-rw-r--r-- | arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h | 117 |
1 files changed, 74 insertions, 43 deletions
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h index 2f3b8fd336..ae951669b3 100644 --- a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h +++ b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,7 @@ #define ARM_COMPUTE_NELSTMLAYERQUANTIZED_H #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/common/LSTMParams.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" @@ -38,8 +39,6 @@ #include "arm_compute/runtime/NEON/functions/NESlice.h" #include "arm_compute/runtime/NEON/functions/NETranspose.h" -#include "arm_compute/runtime/common/LSTMParams.h" - namespace arm_compute { // Forward declarations @@ -47,10 +46,10 @@ class ITensor; /** Basic function to run @ref NELSTMLayerQuantized * - * This function calls the following NEON functions/kernels: + * This function calls the following functions/kernels: * * -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers - * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16 + * -# @ref NEGEMMLowpOutputStage Convert 32-bit integers into QSYMM16 * -# @ref NETranspose Matrix transpose * -# @ref NEConcatenateLayer Tensor concatenation * -# @ref NEActivationLayer Activation functions (tanh and logistic) @@ -67,14 +66,24 @@ public: NELSTMLayerQuantized(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ NELSTMLayerQuantized(const NELSTMLayerQuantized &) = delete; - /** Default move constructor */ - NELSTMLayerQuantized(NELSTMLayerQuantized &&) = default; + /** Prevent instances of this class from being moved (As this class contains pointers) */ + NELSTMLayerQuantized(NELSTMLayerQuantized &&) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ NELSTMLayerQuantized &operator=(const NELSTMLayerQuantized &) = delete; - /** Default move assignment operator */ - NELSTMLayerQuantized &operator=(NELSTMLayerQuantized &&) = default; + /** Prevent instances of this class from being moved (As this class contains pointers) */ + NELSTMLayerQuantized &operator=(NELSTMLayerQuantized &&) = delete; + /** Default destructor */ + ~NELSTMLayerQuantized(); /** Initialize function's tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 - src8 |src9 - src12 |src13 |src14 |dst0 |dst1 | + * |:-----------|:------------|:-------|:------|:------|:------| + * |QASYMM8 |S32 |QSYMM16 |QASYMM8|QSYMM16|QASYMM8| + * * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8. * @param[in] input_to_input_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. @@ -94,11 +103,22 @@ public: * @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input. */ void configure(const ITensor *input, - const ITensor *input_to_input_weights, const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, - const ITensor *recurrent_to_input_weights, const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, - const ITensor *input_gate_bias, const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, - ITensor *cell_state_in, const ITensor *output_state_in, - ITensor *cell_state_out, ITensor *output_state_out); + const ITensor *input_to_input_weights, + const ITensor *input_to_forget_weights, + const ITensor *input_to_cell_weights, + const ITensor *input_to_output_weights, + const ITensor *recurrent_to_input_weights, + const ITensor *recurrent_to_forget_weights, + const ITensor *recurrent_to_cell_weights, + const ITensor *recurrent_to_output_weights, + const ITensor *input_gate_bias, + const ITensor *forget_gate_bias, + const ITensor *cell_bias, + const ITensor *output_gate_bias, + ITensor *cell_state_in, + const ITensor *output_state_in, + ITensor *cell_state_out, + ITensor *output_state_out); /** Static function to check if given info will lead to a valid configuration of @ref NELSTMLayer * @@ -123,11 +143,22 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, - const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, - const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, - const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, - const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, - const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out); + const ITensorInfo *input_to_input_weights, + const ITensorInfo *input_to_forget_weights, + const ITensorInfo *input_to_cell_weights, + const ITensorInfo *input_to_output_weights, + const ITensorInfo *recurrent_to_input_weights, + const ITensorInfo *recurrent_to_forget_weights, + const ITensorInfo *recurrent_to_cell_weights, + const ITensorInfo *recurrent_to_output_weights, + const ITensorInfo *input_gate_bias, + const ITensorInfo *forget_gate_bias, + const ITensorInfo *cell_bias, + const ITensorInfo *output_gate_bias, + const ITensorInfo *cell_state_in, + const ITensorInfo *output_state_in, + const ITensorInfo *cell_state_out, + const ITensorInfo *output_state_out); // Inherited methods overridden: void run() override; @@ -137,30 +168,30 @@ private: MemoryGroup _memory_group; // Functions used - NEGEMMLowpMatrixMultiplyCore _gemmlowp; - NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint _output_stage; - NETranspose _transpose_weights; - NEConcatenateLayer _concat_input_weights; - NEConcatenateLayer _concat_recurrent_weights; - NEConcatenateLayer _concat_weights; - NEConcatenateLayer _concat_inputs; - NEConcatenateLayer _concat_bias; - NEActivationLayer _sigmoid_forget_gate; - NEActivationLayer _sigmoid_input_gate; - NEActivationLayer _sigmoid_output_gate; - NEActivationLayer _tanh_modulation_gate; - NEActivationLayer _tanh_output_state; - NEArithmeticAddition _add1; - NEArithmeticAddition _add2; - NEPixelWiseMultiplication _mul1; - NEPixelWiseMultiplication _mul2; - NEPixelWiseMultiplication _mul3; - NESlice _slice_input_tensor; - NESlice _slice_forget_tensor; - NESlice _slice_cell_tensor; - NESlice _slice_output_tensor; - NEDequantizationLayer _dequantize; - NEQuantizationLayer _quantize; + NEGEMMLowpMatrixMultiplyCore _gemmlowp; + NEGEMMLowpOutputStage _output_stage; + NETranspose _transpose_weights; + NEConcatenateLayer _concat_input_weights; + NEConcatenateLayer _concat_recurrent_weights; + NEConcatenateLayer _concat_weights; + NEConcatenateLayer _concat_inputs; + NEConcatenateLayer _concat_bias; + NEActivationLayer _sigmoid_forget_gate; + NEActivationLayer _sigmoid_input_gate; + NEActivationLayer _sigmoid_output_gate; + NEActivationLayer _tanh_modulation_gate; + NEActivationLayer _tanh_output_state; + NEArithmeticAddition _add1; + NEArithmeticAddition _add2; + NEPixelWiseMultiplication _mul1; + NEPixelWiseMultiplication _mul2; + NEPixelWiseMultiplication _mul3; + NESlice _slice_input_tensor; + NESlice _slice_forget_tensor; + NESlice _slice_cell_tensor; + NESlice _slice_output_tensor; + NEDequantizationLayer _dequantize; + NEQuantizationLayer _quantize; // Tensor pointers const ITensor *_input_to_input_weights; |