aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2019-07-17 16:11:53 +0100
committerManuel Bottini <manuel.bottini@arm.com>2019-07-22 14:06:55 +0000
commit10c53f1ef317095ddcd9143bf759cc68ecb0e721 (patch)
tree644954b909692f1d6b4e20194e81708503a62c2b
parentd176d54b94c5337c97bd87671ce390804da8c10b (diff)
downloadComputeLibrary-10c53f1ef317095ddcd9143bf759cc68ecb0e721.tar.gz
COMPMID-2307: QUANTIZED_16BIT_LSTM operator for CL
Change-Id: I1b52df359f1a368d585fac43a08496544dd2f86f Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/1568 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLStridedSliceKernel.h6
-rw-r--r--arm_compute/core/QuantizationInfo.h12
-rw-r--r--arm_compute/runtime/CL/CLFunctions.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLConcatenateLayer.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLDequantizationLayer.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h203
-rw-r--r--arm_compute/runtime/CL/functions/CLStridedSlice.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h2
-rw-r--r--src/core/CL/kernels/CLDequantizationLayerKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLStridedSliceKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEDequantizationLayerKernel.cpp2
-rw-r--r--src/runtime/CL/functions/CLConcatenateLayer.cpp28
-rw-r--r--src/runtime/CL/functions/CLLSTMLayerQuantized.cpp397
-rw-r--r--src/runtime/NEON/functions/NELSTMLayerQuantized.cpp8
-rw-r--r--tests/datasets/DatatypeDataset.h1
-rw-r--r--tests/validation/CL/BatchConcatenateLayer.cpp9
-rw-r--r--tests/validation/CL/DepthConcatenateLayer.cpp9
-rw-r--r--tests/validation/CL/LSTMLayerQuantized.cpp458
-rw-r--r--tests/validation/CL/WidthConcatenateLayer.cpp9
-rw-r--r--tests/validation/NEON/LSTMLayerQuantized.cpp6
-rw-r--r--tests/validation/fixtures/DequantizationLayerFixture.h24
-rw-r--r--tests/validation/reference/DequantizationLayer.cpp11
24 files changed, 1176 insertions, 45 deletions
diff --git a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
index 6d37f6a1a5..0ee5a13638 100644
--- a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
@@ -48,13 +48,13 @@ public:
~CLDequantizationLayerKernel() = default;
/** Set the input, output, min and max.
*
- * @param[in] input Source tensor. Data types supported: QASYMM8/QSYMM8.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QSYMM8/QSYMM16.
* @param[out] output Destination tensor. Data types supported: F16/F32.
*/
void configure(const ICLTensor *input, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayerKernel
*
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8.
+ * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8/QSYMM16.
* @param[in] output Output tensor info. Data types supported: F16/F32.
*
* @return a status
diff --git a/arm_compute/core/CL/kernels/CLStridedSliceKernel.h b/arm_compute/core/CL/kernels/CLStridedSliceKernel.h
index e104dcfdd7..5b69b3fd16 100644
--- a/arm_compute/core/CL/kernels/CLStridedSliceKernel.h
+++ b/arm_compute/core/CL/kernels/CLStridedSliceKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -54,7 +54,7 @@ public:
*
* @note Supported tensor rank: up to 4
*
- * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/QSYMM16/U32/S32/F16/F32
* @param[out] output Destination tensor. Data type supported: Same as @p input
* @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
* @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
@@ -72,7 +72,7 @@ public:
*
* @note Supported tensor rank: up to 4
*
- * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/QSYMM16/U32/S32/F16/F32
* @param[in] output Destination tensor. Data type supported: Same as @p input
* @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
* @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h
index 587a380d63..79afca0714 100644
--- a/arm_compute/core/QuantizationInfo.h
+++ b/arm_compute/core/QuantizationInfo.h
@@ -300,6 +300,18 @@ inline float dequantize(int8_t value, float scale)
return value * scale;
}
+/** Dequantize a value given a symmetric quantization scheme
+ *
+ * @param[in] value Value to dequantize
+ * @param[in] scale Scale to use for dequantization
+ *
+ * @return Dequantized value
+ */
+inline float dequantize(int16_t value, float scale)
+{
+ return value * scale;
+}
+
/** Quantize a value given a 16-bit symmetric quantization scheme
*
* @param[in] value Value to quantize
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h
index 8c154f2059..922fb6acd9 100644
--- a/arm_compute/runtime/CL/CLFunctions.h
+++ b/arm_compute/runtime/CL/CLFunctions.h
@@ -94,6 +94,7 @@
#include "arm_compute/runtime/CL/functions/CLIntegralImage.h"
#include "arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h"
#include "arm_compute/runtime/CL/functions/CLLSTMLayer.h"
+#include "arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h"
#include "arm_compute/runtime/CL/functions/CLLaplacianPyramid.h"
#include "arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h"
#include "arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h"
diff --git a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
index b69930c7d3..fb9724d167 100644
--- a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
@@ -60,7 +60,8 @@ public:
* @param[out] output Output tensor. Data types supported: Same as @p input.
* @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
*/
- void configure(const std::vector<ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis);
+ void configure(std::vector<ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis);
+ void configure(std::vector<const ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis);
/** Static function to check if given info will lead to a valid configuration of @ref CLConcatenateLayer
*
* @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
@@ -73,11 +74,18 @@ public:
* @return a status
*/
static Status validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
+ static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
// Inherited methods overridden:
void run() override;
private:
+ template <typename TensorType>
+ void configure_internal(std::vector<TensorType *> &&inputs_vector, ICLTensor *output, size_t axis);
+
+ template <typename TensorInfoType>
+ static Status validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output, size_t axis);
+
std::vector<std::unique_ptr<ICLKernel>> _concat_kernels;
unsigned int _num_inputs;
unsigned int _axis;
diff --git a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
index 2f7af01a84..ade589d79e 100644
--- a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
@@ -40,13 +40,13 @@ public:
/** Set the input and output tensors.
*
* @param[in] input Source tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.
- * Data types supported: QASYMM8/QSYMM8.
+ * Data types supported: QASYMM8/QSYMM8/QSYMM16.
* @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32.
*/
void configure(const ICLTensor *input, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayer
*
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8.
+ * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8/QSYMM16.
* @param[in] output Output tensor info. Data type supported: F16/F32.
*
* @return a status
diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h b/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h
new file mode 100644
index 0000000000..e2d164c395
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLLSTMLAYERQUANTIZED_H__
+#define __ARM_COMPUTE_CLLSTMLAYERQUANTIZED_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h"
+#include "arm_compute/runtime/CL/functions/CLDequantizationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
+#include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h"
+#include "arm_compute/runtime/CL/functions/CLQuantizationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLSlice.h"
+#include "arm_compute/runtime/CL/functions/CLTranspose.h"
+
+#include "arm_compute/runtime/common/LSTMParams.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ICLTensor;
+
+/** Basic function to run @ref CLLSTMLayerQuantized
+ *
+ * This function calls the following CL functions/kernels:
+ *
+ * -# @ref CLGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers
+ * -# @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16
+ * -# @ref CLTranspose Matrix transpose
+ * -# @ref CLConcatenateLayer Tensor concatenation
+ * -# @ref CLActivationLayer Activation functions (tanh and logistic)
+ * -# @ref CLArithmeticAddition Elementwise addition
+ * -# @ref CLPixelWiseMultiplication Elementwise multiplication
+ * -# @ref CLSlice Tensor slicing
+ * -# @ref CLDequantizationLayer Dequantize into float
+ * -# @ref CLQuantizationLayer Quantize from float
+ * */
+class CLLSTMLayerQuantized : public IFunction
+{
+public:
+ /** Default constructor */
+ CLLSTMLayerQuantized(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLSTMLayerQuantized(const CLLSTMLayerQuantized &) = delete;
+ /** Default move constructor */
+ CLLSTMLayerQuantized(CLLSTMLayerQuantized &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLSTMLayerQuantized &operator=(const CLLSTMLayerQuantized &) = delete;
+ /** Default move assignment operator */
+ CLLSTMLayerQuantized &operator=(CLLSTMLayerQuantized &&) = default;
+ /** Initialize function's tensors.
+ *
+ * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8.
+ * @param[in] input_to_input_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
+ * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
+ * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
+ * @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
+ * @param[in] recurrent_to_input_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input.
+ * @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input.
+ * @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input.
+ * @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input.
+ * @param[in] input_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32.
+ * @param[in] forget_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32.
+ * @param[in] cell_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32.
+ * @param[in] output_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32.
+ * @param[in] cell_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16.
+ * @param[in] output_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
+ * @param[out] cell_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16.
+ * @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.
+ */
+ void configure(const ICLTensor *input,
+ const ICLTensor *input_to_input_weights, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
+ const ICLTensor *recurrent_to_input_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
+ const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
+ ICLTensor *cell_state_in, const ICLTensor *output_state_in,
+ ICLTensor *cell_state_out, ICLTensor *output_state_out);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLLSTMLayerQuantized
+ *
+ * @param[in] input Source tensor info. Input is a 2D tensor info with dimensions [input_size, batch_size]. Data types supported: QASYMM8.
+ * @param[in] input_to_input_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input.
+ * @param[in] input_to_forget_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input.
+ * @param[in] input_to_cell_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input.
+ * @param[in] input_to_output_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input.
+ * @param[in] recurrent_to_input_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input.
+ * @param[in] recurrent_to_forget_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input.
+ * @param[in] recurrent_to_cell_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input.
+ * @param[in] recurrent_to_output_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input.
+ * @param[in] input_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32.
+ * @param[in] forget_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32.
+ * @param[in] cell_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32.
+ * @param[in] output_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32.
+ * @param[in] cell_state_in 2D tensor info with dimensions [output_size, batch_size]. Data type supported: QSYMM16.
+ * @param[in] output_state_in 2D tensor info with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
+ * @param[out] cell_state_out Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size]. Data type supported: QSYMM16.
+ * @param[out] output_state_out Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size].Data types supported: Same as @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
+ const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
+ const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
+ const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in,
+ const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out);
+
+ // Inherited methods overridden:
+ void run() override;
+ void prepare() override;
+
+private:
+ CLMemoryGroup _memory_group;
+
+ // Functions used
+ CLGEMMLowpMatrixMultiplyCore _gemmlowp;
+ CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint _output_stage;
+ CLTranspose _transpose_weights;
+ CLConcatenateLayer _concat_input_weights;
+ CLConcatenateLayer _concat_recurrent_weights;
+ CLConcatenateLayer _concat_weights;
+ CLConcatenateLayer _concat_inputs;
+ CLConcatenateLayer _concat_bias;
+ CLActivationLayer _sigmoid_forget_gate;
+ CLActivationLayer _sigmoid_input_gate;
+ CLActivationLayer _sigmoid_output_gate;
+ CLActivationLayer _tanh_modulation_gate;
+ CLActivationLayer _tanh_output_state;
+ CLArithmeticAddition _add_cell_state_tmps;
+ CLArithmeticAddition _add2;
+ CLPixelWiseMultiplication _mul_forget_gate_cell_state;
+ CLPixelWiseMultiplication _mul_input_gate_input_mod_gate;
+ CLPixelWiseMultiplication _mul_output_state_tmp_output_gate;
+ CLSlice _slice_input_tensor;
+ CLSlice _slice_forget_tensor;
+ CLSlice _slice_cell_tensor;
+ CLSlice _slice_output_tensor;
+ CLDequantizationLayer _dequantize;
+ CLQuantizationLayer _quantize;
+
+ // Tensor pointers
+ const ICLTensor *_input_to_input_weights;
+ const ICLTensor *_input_to_forget_weights;
+ const ICLTensor *_input_to_cell_weights;
+ const ICLTensor *_input_to_output_weights;
+ const ICLTensor *_recurrent_to_input_weights;
+ const ICLTensor *_recurrent_to_forget_weights;
+ const ICLTensor *_recurrent_to_cell_weights;
+ const ICLTensor *_recurrent_to_output_weights;
+ const ICLTensor *_input_gate_bias;
+ const ICLTensor *_forget_gate_bias;
+ const ICLTensor *_cell_bias;
+ const ICLTensor *_output_gate_bias;
+
+ // Temporary tensors
+ CLTensor _recurrent_weights;
+ CLTensor _input_weights;
+ CLTensor _weights;
+ CLTensor _input;
+ CLTensor _weights_transposed;
+ CLTensor _output_highp;
+ CLTensor _output_lowp;
+ CLTensor _bias;
+ CLTensor _forget_gate_input;
+ CLTensor _input_gate_input;
+ CLTensor _output_gate_input;
+ CLTensor _input_modulation_gate_input;
+ CLTensor _forget_gate_output;
+ CLTensor _input_gate_output;
+ CLTensor _output_gate_output;
+ CLTensor _input_modulation_gate_output;
+ CLTensor _cell_state_tmp1;
+ CLTensor _cell_state_tmp2;
+ CLTensor _output_state_tmp;
+ CLTensor _output_state_out_symm;
+ CLTensor _output_state_out_f32;
+
+ bool _is_prepared;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLLSTMLAYERQUANTIZED_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLStridedSlice.h b/arm_compute/runtime/CL/functions/CLStridedSlice.h
index 4a336f6fdc..bb97b17fea 100644
--- a/arm_compute/runtime/CL/functions/CLStridedSlice.h
+++ b/arm_compute/runtime/CL/functions/CLStridedSlice.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,7 +39,7 @@ public:
*
* @note Supported tensor rank: up to 4
*
- * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/QSYMM16/U32/S32/F16/F32
* @param[out] output Destination tensor. Data type supported: Same as @p input
* @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
* @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
@@ -57,7 +57,7 @@ public:
*
* @note Supported tensor rank: up to 4
*
- * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/QSYMM16/U32/S32/F16/F32
* @param[in] output Destination tensor. Data type supported: Same as @p input
* @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
* @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
index b45d714990..7f02988c19 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
@@ -53,7 +53,7 @@ class ITensor;
* -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16
* -# @ref NETranspose Matrix transpose
* -# @ref NEConcatenateLayer Tensor concatenation
- * -# @ref NEActivationLayer Activation functions (tanh and logistig)
+ * -# @ref NEActivationLayer Activation functions (tanh and logistic)
* -# @ref NEArithmeticAddition Elementwise addition
* -# @ref NEPixelWiseMultiplication Elementwise multiplication
* -# @ref NESlice Tensor slicing
diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
index e383bc475d..12d36cdb9f 100644
--- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
@@ -33,14 +33,14 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
-using namespace arm_compute;
-
+namespace arm_compute
+{
namespace
{
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8, DataType::QSYMM16);
if(output->tensor_shape().total_size() > 0)
{
@@ -135,3 +135,4 @@ void CLDequantizationLayerKernel::run(const Window &window, cl::CommandQueue &qu
}
while(window_collapsed.slide_window_slice_3D(slice));
}
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/core/CL/kernels/CLStridedSliceKernel.cpp b/src/core/CL/kernels/CLStridedSliceKernel.cpp
index c2bdf7f299..9dd488b678 100644
--- a/src/core/CL/kernels/CLStridedSliceKernel.cpp
+++ b/src/core/CL/kernels/CLStridedSliceKernel.cpp
@@ -48,7 +48,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1,
DataType::U8, DataType::S8, DataType::QASYMM8,
- DataType::U16, DataType::S16,
+ DataType::U16, DataType::S16, DataType::QSYMM16,
DataType::U32, DataType::S32,
DataType::F16, DataType::F32);
diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
index a3ac102564..4e673a9f38 100644
--- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
@@ -84,7 +84,7 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2,
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, input3, input4, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input1);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, DataType::F16, DataType::U32,
- DataType::F32);
+ DataType::S32, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, input3, input4, output);
ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) + input2->dimension(0) + input3->dimension(0) + input4->dimension(0) > output->dimension(0));
diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
index d11f04a82f..e52f53ea04 100644
--- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
@@ -194,7 +194,7 @@ void run_dequantization_qsymm8(const ITensor *input, ITensor *output, const Wind
// Compute left-over elements
for(; x < window_end_x; ++x)
{
- uint8_t val = *(in_ptr + x);
+ int8_t val = *(in_ptr + x);
*(out_ptr + x) = static_cast<T>(dequantize(val, scale));
}
},
diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp
index 1d396f5ebf..5d224db8e9 100644
--- a/src/runtime/CL/functions/CLConcatenateLayer.cpp
+++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp
@@ -47,14 +47,35 @@ CLConcatenateLayer::CLConcatenateLayer()
{
}
-void CLConcatenateLayer::configure(const std::vector<ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis)
+void CLConcatenateLayer::configure(std::vector<ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis)
+{
+ configure_internal(std::move(inputs_vector), output, axis);
+}
+
+void CLConcatenateLayer::configure(std::vector<const ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis)
+{
+ configure_internal(std::move(inputs_vector), output, axis);
+}
+
+Status CLConcatenateLayer::validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
+{
+ return validate_internal(inputs_vector, output, axis);
+}
+
+Status CLConcatenateLayer::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
+{
+ return validate_internal(inputs_vector, output, axis);
+}
+
+template <typename TensorType>
+void CLConcatenateLayer::configure_internal(std::vector<TensorType *> &&inputs_vector, ICLTensor *output, size_t axis)
{
ARM_COMPUTE_ERROR_ON(output == nullptr);
_axis = axis;
_num_inputs = inputs_vector.size();
std::vector<ITensorInfo *> inputs_vector_info(inputs_vector.size());
- std::transform(inputs_vector.begin(), inputs_vector.end(), inputs_vector_info.begin(), [](ICLTensor * t)
+ std::transform(inputs_vector.begin(), inputs_vector.end(), inputs_vector_info.begin(), [](TensorType * t)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(t);
return t->info();
@@ -141,7 +162,8 @@ void CLConcatenateLayer::configure(const std::vector<ICLTensor *> &inputs_vector
}
}
-Status CLConcatenateLayer::validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
+template <typename TensorInfoType>
+Status CLConcatenateLayer::validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output, size_t axis)
{
ARM_COMPUTE_RETURN_ERROR_ON(output == nullptr);
const unsigned int num_inputs = inputs_vector.size();
diff --git a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
new file mode 100644
index 0000000000..e0006a77d0
--- /dev/null
+++ b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h"
+
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+
+#include <cmath>
+#include <memory>
+#include <tuple>
+
+namespace arm_compute
+{
+namespace
+{
+// Quantization info structures used in the LSTMQuantize layer
+const QuantizationInfo qasymm(1.f / 128.f, 128);
+const QuantizationInfo qsymm_3(8.f / 32768.f, 0); // qsymm16 with 3 integer bit
+const QuantizationInfo qsymm_4(16.f / 32768.f, 0); // qsymm16 with 4 integer bit
+const QuantizationInfo qsymm_0(1.f / 32768.f, 0); // qsymm16 with 0 integer bit
+} // namespace
+
+CLLSTMLayerQuantized::CLLSTMLayerQuantized(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _gemmlowp(), _output_stage(), _transpose_weights(), _concat_input_weights(), _concat_recurrent_weights(), _concat_weights(), _concat_inputs(),
+ _concat_bias(), _sigmoid_forget_gate(), _sigmoid_input_gate(), _sigmoid_output_gate(), _tanh_modulation_gate(), _tanh_output_state(), _add_cell_state_tmps(), _add2(), _mul_forget_gate_cell_state(),
+ _mul_input_gate_input_mod_gate(), _mul_output_state_tmp_output_gate(), _slice_input_tensor(), _slice_forget_tensor(), _slice_cell_tensor(), _slice_output_tensor(), _dequantize(), _quantize(),
+ _input_to_input_weights(nullptr), _input_to_forget_weights(nullptr), _input_to_cell_weights(nullptr), _input_to_output_weights(nullptr), _recurrent_to_input_weights(nullptr),
+ _recurrent_to_forget_weights(nullptr), _recurrent_to_cell_weights(nullptr), _recurrent_to_output_weights(nullptr), _input_gate_bias(nullptr), _forget_gate_bias(nullptr), _cell_bias(nullptr),
+ _output_gate_bias(nullptr), _recurrent_weights(), _input_weights(), _weights(), _input(), _weights_transposed(), _output_highp(), _output_lowp(), _bias(), _forget_gate_input(), _input_gate_input(),
+ _output_gate_input(), _input_modulation_gate_input(), _forget_gate_output(), _input_gate_output(), _output_gate_output(), _input_modulation_gate_output(), _cell_state_tmp1(), _cell_state_tmp2(),
+ _output_state_tmp(), _output_state_out_symm(), _output_state_out_f32(), _is_prepared(false)
+{
+}
+
+void CLLSTMLayerQuantized::configure(const ICLTensor *input,
+ const ICLTensor *input_to_input_weights, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
+ const ICLTensor *recurrent_to_input_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
+ const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
+ ICLTensor *cell_state_in, const ICLTensor *output_state_in,
+ ICLTensor *cell_state_out, ICLTensor *output_state_out)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
+ recurrent_to_input_weights, recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
+ input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out, output_state_out);
+
+ ARM_COMPUTE_ERROR_THROW_ON(CLLSTMLayerQuantized::validate(input->info(), input_to_input_weights->info(), input_to_forget_weights->info(), input_to_cell_weights->info(),
+ input_to_output_weights->info(),
+ recurrent_to_input_weights->info(), recurrent_to_forget_weights->info(), recurrent_to_cell_weights->info(), recurrent_to_output_weights->info(),
+ input_gate_bias->info(), forget_gate_bias->info(), cell_bias->info(), output_gate_bias->info(), cell_state_in->info(), output_state_in->info(), cell_state_out->info(), output_state_out->info()));
+
+ const int input_size = input->info()->dimension(0);
+ const int batch_size = input->info()->dimension(1);
+ const int output_size = input_to_input_weights->info()->dimension(1);
+
+ const QuantizationInfo qweights = input_to_input_weights->info()->quantization_info(); // Weights quantization
+
+ auto_init_if_empty(*cell_state_out->info(), TensorInfo(TensorShape(batch_size, output_size), 1, DataType::QSYMM16, qsymm_4));
+ auto_init_if_empty(*output_state_out->info(), TensorInfo(TensorShape(batch_size, output_size), 1, DataType::QASYMM8, qasymm));
+
+ _input_to_input_weights = input_to_input_weights;
+ _input_to_forget_weights = input_to_forget_weights;
+ _input_to_cell_weights = input_to_cell_weights;
+ _input_to_output_weights = input_to_output_weights;
+ _recurrent_to_input_weights = recurrent_to_input_weights;
+ _recurrent_to_forget_weights = recurrent_to_forget_weights;
+ _recurrent_to_cell_weights = recurrent_to_cell_weights;
+ _recurrent_to_output_weights = recurrent_to_output_weights;
+ _input_gate_bias = input_gate_bias;
+ _forget_gate_bias = forget_gate_bias;
+ _cell_bias = cell_bias;
+ _output_gate_bias = output_gate_bias;
+
+ // Weights concatenation
+ std::vector<const ICLTensor *> inputs_weights_vector;
+ inputs_weights_vector.emplace_back(input_to_input_weights);
+ inputs_weights_vector.emplace_back(input_to_forget_weights);
+ inputs_weights_vector.emplace_back(input_to_cell_weights);
+ inputs_weights_vector.emplace_back(input_to_output_weights);
+
+ std::vector<const ICLTensor *> recurrent_weights_vector;
+ recurrent_weights_vector.emplace_back(recurrent_to_input_weights);
+ recurrent_weights_vector.emplace_back(recurrent_to_forget_weights);
+ recurrent_weights_vector.emplace_back(recurrent_to_cell_weights);
+ recurrent_weights_vector.emplace_back(recurrent_to_output_weights);
+
+ _input_weights.allocator()->init(TensorInfo(TensorShape(input_size, 4 * output_size), 1, DataType::QASYMM8, qweights));
+ _concat_input_weights.configure(inputs_weights_vector, &_input_weights, Window::DimY);
+
+ _recurrent_weights.allocator()->init(TensorInfo(TensorShape(output_size, 4 * output_size), 1, DataType::QASYMM8, qweights));
+ _concat_recurrent_weights.configure(recurrent_weights_vector, &_recurrent_weights, Window::DimY);
+
+ std::vector<const ICLTensor *> weights_vector;
+ weights_vector.emplace_back(&_recurrent_weights);
+ weights_vector.emplace_back(&_input_weights);
+
+ _weights.allocator()->init(TensorInfo(TensorShape(output_size + input_size, 4 * output_size), 1, DataType::QASYMM8, qweights));
+ _concat_weights.configure(weights_vector, &_weights, Window::DimX);
+ _transpose_weights.configure(&_weights, &_weights_transposed);
+
+ // Input concatenation
+ std::vector<const ICLTensor *> input_vector;
+ input_vector.emplace_back(input);
+ input_vector.emplace_back(output_state_in);
+
+ _memory_group.manage(&_input);
+ _input.allocator()->init(TensorInfo(TensorShape(output_size + input_size, batch_size), 1, DataType::QASYMM8, qasymm));
+ _concat_inputs.configure(input_vector, &_input, Window::DimX);
+
+ // Bias concatenation
+ std::vector<const ICLTensor *> bias_vector;
+ bias_vector.emplace_back(input_gate_bias);
+ bias_vector.emplace_back(forget_gate_bias);
+ bias_vector.emplace_back(cell_bias);
+ bias_vector.emplace_back(output_gate_bias);
+
+ _bias.allocator()->init(TensorInfo(TensorShape(4 * output_size), 1, DataType::S32));
+ _concat_bias.configure(bias_vector, &_bias, Window::DimX);
+
+ // Invert the offset for gemmlowp
+ _input.info()->set_quantization_info(QuantizationInfo(qasymm.uniform().scale, -qasymm.uniform().offset));
+ _weights_transposed.info()->set_quantization_info(QuantizationInfo(qweights.uniform().scale, -qweights.uniform().offset));
+
+ // Run gemmlowp
+ _memory_group.manage(&_output_highp);
+ _output_highp.allocator()->init(TensorInfo(TensorShape(4 * output_size, batch_size), 1, DataType::S32));
+ _gemmlowp.configure(&_input, &_weights_transposed, nullptr, &_output_highp);
+ _input.allocator()->allocate();
+
+ // Set the offset back
+ _input.info()->set_quantization_info(QuantizationInfo(qasymm.uniform().scale, qasymm.uniform().offset));
+ _weights_transposed.info()->set_quantization_info(QuantizationInfo(qweights.uniform().scale, qweights.uniform().offset));
+
+ // multiplier = (input_scale * weights_scale) / output_scale (2 ^ (-12))
+ _output_lowp.allocator()->init(TensorInfo(_output_highp.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_3));
+
+ const float multiplier = 4096.f * qasymm.uniform().scale * qweights.uniform().scale;
+ int output_multiplier = 0;
+ int output_shift = 0;
+
+ quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+
+ _memory_group.manage(&_output_lowp);
+ _output_stage.configure(&_output_highp, &_bias, &_output_lowp, output_multiplier, output_shift);
+ _output_highp.allocator()->allocate();
+ _bias.allocator()->allocate();
+
+ // Get the gate tensors
+ _memory_group.manage(&_input_gate_input);
+ _slice_input_tensor.configure(&_output_lowp, &_input_gate_input, { 0, 0 }, { output_size, batch_size });
+ _memory_group.manage(&_forget_gate_input);
+ _slice_forget_tensor.configure(&_output_lowp, &_forget_gate_input, { output_size, 0 }, { 2 * output_size, batch_size });
+ _memory_group.manage(&_input_modulation_gate_input);
+ _slice_cell_tensor.configure(&_output_lowp, &_input_modulation_gate_input, { 2 * output_size, 0 }, { 3 * output_size, batch_size });
+ _memory_group.manage(&_output_gate_input);
+ _slice_output_tensor.configure(&_output_lowp, &_output_gate_input, { 3 * output_size, 0 }, { 4 * output_size, batch_size });
+ _output_lowp.allocator()->allocate();
+
+ // Forget gate
+ _memory_group.manage(&_forget_gate_output);
+ _forget_gate_output.allocator()->init(TensorInfo(_forget_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
+ _sigmoid_forget_gate.configure(&_forget_gate_input, &_forget_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+ _forget_gate_input.allocator()->allocate();
+
+ // Input gate
+ _memory_group.manage(&_input_gate_output);
+ _input_gate_output.allocator()->init(TensorInfo(_input_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
+ _sigmoid_input_gate.configure(&_input_gate_input, &_input_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+ _input_gate_input.allocator()->allocate();
+
+ // Input modulation gate equation
+ _memory_group.manage(&_input_modulation_gate_output);
+ _input_modulation_gate_output.allocator()->init(TensorInfo(_input_modulation_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
+ _tanh_modulation_gate.configure(&_input_modulation_gate_input, &_input_modulation_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f));
+ _input_modulation_gate_input.allocator()->allocate();
+
+ // Output gate
+ _memory_group.manage(&_output_gate_output);
+ _output_gate_output.allocator()->init(TensorInfo(_output_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
+ _sigmoid_output_gate.configure(&_output_gate_input, &_output_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+ _output_gate_input.allocator()->allocate();
+
+ // Long term memory
+ _memory_group.manage(&_cell_state_tmp1);
+ _cell_state_tmp1.allocator()->init(TensorInfo(_forget_gate_output.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_4));
+ _mul_forget_gate_cell_state.configure(&_forget_gate_output, cell_state_in, &_cell_state_tmp1, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
+ _forget_gate_output.allocator()->allocate();
+
+ _memory_group.manage(&_cell_state_tmp2);
+ _cell_state_tmp2.allocator()->init(TensorInfo(_input_gate_output.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_4));
+ _mul_input_gate_input_mod_gate.configure(&_input_gate_output, &_input_modulation_gate_output, &_cell_state_tmp2, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
+ _input_modulation_gate_output.allocator()->allocate();
+ _input_gate_output.allocator()->allocate();
+
+ _add_cell_state_tmps.configure(&_cell_state_tmp1, &_cell_state_tmp2, cell_state_out, ConvertPolicy::SATURATE);
+ _cell_state_tmp1.allocator()->allocate();
+ _cell_state_tmp2.allocator()->allocate();
+
+ // Short term memory
+ _memory_group.manage(&_output_state_tmp);
+ _output_state_tmp.allocator()->init(TensorInfo(cell_state_out->info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
+ _tanh_output_state.configure(cell_state_out, &_output_state_tmp, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f));
+
+ _memory_group.manage(&_output_state_out_symm);
+ _output_state_out_symm.allocator()->init(TensorInfo(_output_gate_output.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
+ _mul_output_state_tmp_output_gate.configure(&_output_state_tmp, &_output_gate_output, &_output_state_out_symm, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
+ _output_gate_output.allocator()->allocate();
+ _output_state_tmp.allocator()->allocate();
+
+ // Requantize the output state from QSYMM16 to QASYMM8
+ _memory_group.manage(&_output_state_out_f32);
+ _output_state_out_f32.allocator()->init(TensorInfo(_output_state_out_symm.info()->tensor_shape(), 1, DataType::F32));
+ _dequantize.configure(&_output_state_out_symm, &_output_state_out_f32);
+ _output_state_out_symm.allocator()->allocate();
+
+ _quantize.configure(&_output_state_out_f32, output_state_out);
+ _output_state_out_f32.allocator()->allocate();
+}
+
+Status CLLSTMLayerQuantized::validate(const ITensorInfo *input,
+ const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
+ const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
+ const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
+ const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in,
+ const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_input_weights,
+ recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in,
+ output_state_in, cell_state_out, output_state_out);
+
+ const int input_size = input->dimension(0);
+ const int batch_size = input->dimension(1);
+ const int output_size = input_to_input_weights->dimension(1);
+
+ // Dimensionality checks
+ ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2);
+ ARM_COMPUTE_RETURN_ERROR_ON(input_to_input_weights->num_dimensions() > 2);
+ ARM_COMPUTE_RETURN_ERROR_ON(input_gate_bias->num_dimensions() > 1);
+ ARM_COMPUTE_RETURN_ERROR_ON(output_state_in->num_dimensions() > 2);
+
+ TensorInfo input_weights_info(input_to_input_weights->clone()->set_tensor_shape(TensorShape(input_size, output_size)).set_data_type(DataType::QASYMM8));
+ TensorInfo recurrent_weights_info(input_to_input_weights->clone()->set_tensor_shape(TensorShape(output_size, output_size)).set_data_type(DataType::QASYMM8));
+ TensorInfo bias_info(input_gate_bias->clone()->set_tensor_shape(TensorShape(output_size)).set_data_type(DataType::S32));
+ TensorInfo output_state_info(cell_state_in->clone()->set_tensor_shape(TensorShape(output_size, batch_size)).set_data_type(DataType::QASYMM8).set_quantization_info(qasymm));
+ TensorInfo cell_state_info(cell_state_in->clone()->set_tensor_shape(TensorShape(output_size, batch_size)).set_data_type(DataType::QSYMM16).set_quantization_info(qsymm_4));
+
+ // Shape checks
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&input_weights_info, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&recurrent_weights_info, recurrent_to_input_weights, recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&bias_info, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&cell_state_info, cell_state_in);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&output_state_info, output_state_in);
+
+ // Data type checks
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input_weights_info, input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&recurrent_weights_info, recurrent_to_input_weights, recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&bias_info, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&cell_state_info, cell_state_in);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&output_state_info, output_state_in);
+
+ // Quantization checks
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(recurrent_to_input_weights, recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(&cell_state_info, cell_state_in);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(&output_state_info, output_state_in);
+
+ if(cell_state_out->total_size() != 0)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&cell_state_info, cell_state_out);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&cell_state_info, cell_state_out);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(&cell_state_info, cell_state_out);
+ }
+
+ if(output_state_out->total_size() != 0)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&output_state_info, output_state_out);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&output_state_info, output_state_out);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(&output_state_info, output_state_out);
+ }
+
+ return Status{};
+}
+
+void CLLSTMLayerQuantized::run()
+{
+ prepare();
+
+ // Acquire all the temporaries
+ MemoryGroupResourceScope scope_mg(_memory_group);
+
+ // Concat and transpose the input
+ _concat_inputs.run();
+
+ // Run gemmlowp
+ _gemmlowp.run();
+ _output_stage.run();
+
+ // Slice the results
+ _slice_input_tensor.run();
+ _slice_forget_tensor.run();
+ _slice_cell_tensor.run();
+ _slice_output_tensor.run();
+
+ // Gates
+ // Forget gate
+ _sigmoid_forget_gate.run();
+
+ // Input gate
+ _sigmoid_input_gate.run();
+
+ // Input modulation gate
+ _tanh_modulation_gate.run();
+
+ // Output gate
+ _sigmoid_output_gate.run();
+
+ // Cell state (long term memory)
+ _mul_forget_gate_cell_state.run();
+ _mul_input_gate_input_mod_gate.run();
+ _add_cell_state_tmps.run();
+
+ // Output state (short term memory)
+ _tanh_output_state.run();
+ _mul_output_state_tmp_output_gate.run();
+
+ // Requantize output state from QSYMM16 to QASYMM16
+ _dequantize.run();
+ _quantize.run();
+}
+
+void CLLSTMLayerQuantized::prepare()
+{
+ if(!_is_prepared)
+ {
+ _input_weights.allocator()->allocate();
+ _concat_input_weights.run();
+
+ _input_to_input_weights->mark_as_unused();
+ _input_to_forget_weights->mark_as_unused();
+ _input_to_cell_weights->mark_as_unused();
+ _input_to_output_weights->mark_as_unused();
+
+ _recurrent_weights.allocator()->allocate();
+ _concat_recurrent_weights.run();
+ _recurrent_to_input_weights->mark_as_unused();
+ _recurrent_to_forget_weights->mark_as_unused();
+ _recurrent_to_cell_weights->mark_as_unused();
+ _recurrent_to_output_weights->mark_as_unused();
+
+ _weights.allocator()->allocate();
+ _concat_weights.run();
+
+ _input_weights.mark_as_unused();
+ _input_weights.allocator()->free();
+ _recurrent_weights.mark_as_unused();
+ _recurrent_weights.allocator()->free();
+
+ _weights_transposed.allocator()->allocate();
+ _transpose_weights.run();
+
+ _weights.mark_as_unused();
+ _weights.allocator()->free();
+
+ _bias.allocator()->allocate();
+ _concat_bias.run();
+ _input_gate_bias->mark_as_unused();
+ _forget_gate_bias->mark_as_unused();
+ _cell_bias->mark_as_unused();
+ _output_gate_bias->mark_as_unused();
+
+ _is_prepared = true;
+ }
+}
+
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp
index 05e05a5e57..6cfa9887ff 100644
--- a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp
+++ b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp
@@ -240,7 +240,7 @@ Status NELSTMLayerQuantized::validate(const ITensorInfo *input,
ARM_COMPUTE_RETURN_ERROR_ON(output_state_in->num_dimensions() > 2);
TensorInfo input_weights_info(input_to_input_weights->clone()->set_tensor_shape(TensorShape(input_size, output_size)).set_data_type(DataType::QASYMM8));
- TensorInfo recurrent_weights_info(input_to_input_weights->clone()->set_tensor_shape(TensorShape(output_size, output_size)).set_data_type(DataType::QASYMM8).set_quantization_info(qasymm));
+ TensorInfo recurrent_weights_info(input_to_input_weights->clone()->set_tensor_shape(TensorShape(output_size, output_size)).set_data_type(DataType::QASYMM8));
TensorInfo bias_info(input_gate_bias->clone()->set_tensor_shape(TensorShape(output_size)).set_data_type(DataType::S32));
TensorInfo output_state_info(cell_state_in->clone()->set_tensor_shape(TensorShape(output_size, batch_size)).set_data_type(DataType::QASYMM8).set_quantization_info(qasymm));
TensorInfo cell_state_info(cell_state_in->clone()->set_tensor_shape(TensorShape(output_size, batch_size)).set_data_type(DataType::QSYMM16).set_quantization_info(qsymm_4));
@@ -254,14 +254,14 @@ Status NELSTMLayerQuantized::validate(const ITensorInfo *input,
// Data type checks
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input_weights_info, input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&recurrent_weights_info, recurrent_to_input_weights, recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(recurrent_to_input_weights, recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&bias_info, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&cell_state_info, cell_state_in);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&output_state_info, output_state_in);
// Quantization checks
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(&input_weights_info, input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(&recurrent_weights_info, recurrent_to_input_weights, recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(&input_weights_info, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(recurrent_to_input_weights, recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(&cell_state_info, cell_state_in);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(&output_state_info, output_state_in);
diff --git a/tests/datasets/DatatypeDataset.h b/tests/datasets/DatatypeDataset.h
index bb2774b4b3..a158a5f52d 100644
--- a/tests/datasets/DatatypeDataset.h
+++ b/tests/datasets/DatatypeDataset.h
@@ -43,6 +43,7 @@ public:
{
DataType::QSYMM8,
DataType::QASYMM8,
+ DataType::QSYMM16,
})
{
}
diff --git a/tests/validation/CL/BatchConcatenateLayer.cpp b/tests/validation/CL/BatchConcatenateLayer.cpp
index b789569155..6c4ffee1dc 100644
--- a/tests/validation/CL/BatchConcatenateLayer.cpp
+++ b/tests/validation/CL/BatchConcatenateLayer.cpp
@@ -97,9 +97,12 @@ TEST_CASE(Configuration, framework::DatasetMode::ALL)
ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
// Create and configure function
- CLConcatenateLayer concat_layer;
-
- concat_layer.configure({ &src1, &src2, &src3 }, &dst, 3);
+ CLConcatenateLayer concat_layer;
+ std::vector<ICLTensor *> inputs;
+ inputs.emplace_back(&src1);
+ inputs.emplace_back(&src2);
+ inputs.emplace_back(&src3);
+ concat_layer.configure(inputs, &dst, 3);
}
template <typename T>
using CLBatchConcatenateLayerFixture = ConcatenateLayerValidationFixture<CLTensor, ICLTensor, CLAccessor, CLConcatenateLayer, T>;
diff --git a/tests/validation/CL/DepthConcatenateLayer.cpp b/tests/validation/CL/DepthConcatenateLayer.cpp
index 8cbfda382b..c67ed05ecd 100644
--- a/tests/validation/CL/DepthConcatenateLayer.cpp
+++ b/tests/validation/CL/DepthConcatenateLayer.cpp
@@ -94,9 +94,12 @@ TEST_CASE(Configuration, framework::DatasetMode::ALL)
ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
// Create and configure function
- CLConcatenateLayer concat_layer;
-
- concat_layer.configure({ &src1, &src2, &src3 }, &dst, 2);
+ CLConcatenateLayer concat_layer;
+ std::vector<ICLTensor *> inputs;
+ inputs.emplace_back(&src1);
+ inputs.emplace_back(&src2);
+ inputs.emplace_back(&src3);
+ concat_layer.configure(inputs, &dst, 2);
}
template <typename T>
using CLDepthConcatenateLayerFixture = ConcatenateLayerValidationFixture<CLTensor, ICLTensor, CLAccessor, CLConcatenateLayer, T>;
diff --git a/tests/validation/CL/LSTMLayerQuantized.cpp b/tests/validation/CL/LSTMLayerQuantized.cpp
new file mode 100644
index 0000000000..1fc0af1ecb
--- /dev/null
+++ b/tests/validation/CL/LSTMLayerQuantized.cpp
@@ -0,0 +1,458 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/Utils.h"
+#include "tests/datasets/LSTMLayerDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+template <typename T>
+inline void fill_tensor(CLTensor &tensor, const std::vector<T> &v)
+{
+ tensor.map(true);
+ // Import memory accounting for padding
+ TensorShape t_shape = tensor.info()->tensor_shape();
+ Window window;
+ window.use_tensor_dimensions(t_shape);
+ Iterator out(&tensor, window);
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ *reinterpret_cast<T *>(out.ptr()) = v[coord2index(t_shape, id)];
+ },
+ out);
+ tensor.unmap();
+}
+
+template <typename T>
+inline void fill_tensor(SimpleTensor<T> &tensor, const std::vector<T> &v)
+{
+ std::memcpy(tensor.data(), v.data(), sizeof(T) * v.size());
+}
+
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(LSTMLayerQuantized)
+
+// *INDENT-OFF*
+// clang-format off
+TEST_CASE(IntegrationTestCaseSmall, framework::DatasetMode::PRECOMMIT)
+{
+ const int batch_size = 2;
+ const int input_size = 2;
+ const int output_size = 4;
+
+
+ QuantizationInfo qasymm(1.f / 128.f, 128);
+ QuantizationInfo qweights(1.f / 128.f, 128);
+ QuantizationInfo qsymm_3(8.f / 32768.f, 0);
+ QuantizationInfo qsymm_4(16.f / 32768.f, 0);
+
+ TensorShape input_shape{ input_size, batch_size };
+ TensorShape input_weights_shape{ input_size, output_size };
+ TensorShape recurrent_weights_shape{ output_size, output_size };
+ TensorShape output_shape{ output_size, batch_size};
+ TensorShape bias_shape{ output_size };
+
+ auto input_to_input_weights = create_tensor<CLTensor>(input_weights_shape, DataType::QASYMM8, 1, qweights);
+ auto input_to_forget_weights = create_tensor<CLTensor>(input_weights_shape, DataType::QASYMM8, 1, qweights);
+ auto input_to_cell_weights = create_tensor<CLTensor>(input_weights_shape, DataType::QASYMM8, 1, qweights);
+ auto input_to_output_weights = create_tensor<CLTensor>(input_weights_shape, DataType::QASYMM8, 1, qweights);
+ auto recurrent_to_input_weights = create_tensor<CLTensor>(recurrent_weights_shape, DataType::QASYMM8, 1, qweights);
+ auto recurrent_to_forget_weights = create_tensor<CLTensor>(recurrent_weights_shape, DataType::QASYMM8, 1, qweights);
+ auto recurrent_to_cell_weights = create_tensor<CLTensor>(recurrent_weights_shape, DataType::QASYMM8, 1, qweights);
+ auto recurrent_to_output_weights = create_tensor<CLTensor>(recurrent_weights_shape, DataType::QASYMM8, 1, qweights);
+ auto input_gate_bias = create_tensor<CLTensor>(bias_shape, DataType::S32);
+ auto forget_gate_bias = create_tensor<CLTensor>(bias_shape, DataType::S32);
+ auto cell_gate_bias = create_tensor<CLTensor>(bias_shape, DataType::S32);
+ auto output_gate_bias = create_tensor<CLTensor>(bias_shape, DataType::S32);
+
+ // LSTM input
+ auto input = create_tensor<CLTensor>(input_shape, DataType::QASYMM8, 1, qasymm);
+
+ // LSTM output state
+ auto output_state = create_tensor<CLTensor>(output_shape, DataType::QASYMM8, 1, qasymm);
+
+ // LSTM cell state
+ auto cell_state = create_tensor<CLTensor>(output_shape, DataType::QSYMM16, 1, qsymm_4);
+
+ CLLSTMLayerQuantized lstmq;
+
+ lstmq.configure(&input, &input_to_input_weights, &input_to_forget_weights, &input_to_cell_weights, &input_to_output_weights,
+ &recurrent_to_input_weights, &recurrent_to_forget_weights, &recurrent_to_cell_weights, &recurrent_to_output_weights,
+ &input_gate_bias, &forget_gate_bias, &cell_gate_bias, &output_gate_bias, &cell_state, &output_state, &cell_state, &output_state);
+
+ input.allocator()->allocate();
+ input_to_input_weights.allocator()->allocate();
+ input_to_forget_weights.allocator()->allocate();
+ input_to_cell_weights.allocator()->allocate();
+ input_to_output_weights.allocator()->allocate();
+ recurrent_to_input_weights.allocator()->allocate();
+ recurrent_to_forget_weights.allocator()->allocate();
+ recurrent_to_cell_weights.allocator()->allocate();
+ recurrent_to_output_weights.allocator()->allocate();
+ input_gate_bias.allocator()->allocate();
+ forget_gate_bias.allocator()->allocate();
+ cell_gate_bias.allocator()->allocate();
+ output_gate_bias.allocator()->allocate();
+ cell_state.allocator()->allocate();
+ output_state.allocator()->allocate();
+
+ // Fill weights and biases
+ fill_tensor(input_to_input_weights, std::vector<uint8_t>{ 47, 168,
+ 66, 239,
+ 6, 42,
+ 237, 236 });
+
+ fill_tensor(input_to_forget_weights, std::vector<uint8_t> { 204, 193,
+ 148, 59,
+ 113, 17,
+ 66, 197 });
+
+ fill_tensor(input_to_cell_weights, std::vector<uint8_t> { 172, 101,
+ 184, 209,
+ 165, 82,
+ 108, 209 });
+
+ fill_tensor(input_to_output_weights, std::vector<uint8_t> { 203, 244,
+ 219, 114,
+ 130, 16,
+ 163, 222 });
+
+ fill_tensor(recurrent_to_input_weights, std::vector<uint8_t> { 162, 168, 7, 95,
+ 91, 155, 108, 216,
+ 255, 100, 48, 188,
+ 58, 37, 186, 147 });
+
+ fill_tensor(recurrent_to_forget_weights, std::vector<uint8_t> { 46, 58, 47, 170,
+ 246, 96, 12, 99,
+ 68, 23, 186, 161,
+ 237, 164, 89, 6 });
+
+ fill_tensor(recurrent_to_cell_weights, std::vector<uint8_t> { 234, 99, 71, 206,
+ 205, 159, 64, 253,
+ 191, 148, 116, 8,
+ 209, 136, 59, 138 });
+
+ fill_tensor(recurrent_to_output_weights, std::vector<uint8_t> { 23, 241, 137, 36,
+ 206, 5, 227, 56,
+ 254, 176, 231, 47,
+ 18, 201, 161, 11 });
+
+ fill_tensor(input_gate_bias, std::vector<int> {-103038, 30525, 115255, -38154 });
+ fill_tensor(forget_gate_bias, std::vector<int> { -23428, 126970, 116806, 46307 });
+ fill_tensor(cell_gate_bias, std::vector<int> { 128006, 69949, -42808, 42568 });
+ fill_tensor(output_gate_bias, std::vector<int> { -67066, -53607, 47233, 7300 });
+
+ SimpleTensor<uint8_t> expected_output(output_shape, DataType::QASYMM8, 1, qasymm);
+
+ // Initialize state
+ fill_tensor(output_state, std::vector<uint8_t> { 128, 128, 128, 128,
+ 128, 128, 128, 128 });
+ fill_tensor(cell_state, std::vector<int16_t> { 0, 0, 0, 0,
+ 0, 0, 0, 0 });
+
+ // First input
+ fill_tensor(input, std::vector<uint8_t> { 106, 193,
+ 155, 150 });
+
+ fill_tensor(expected_output, std::vector<uint8_t> { 128, 130, 36, 134,
+ 128, 131, 35, 133 });
+
+ lstmq.run();
+ validate(CLAccessor(output_state), expected_output);
+
+ // Second input
+ fill_tensor(expected_output, std::vector<uint8_t> { 128, 129, 12, 137,
+ 128, 131, 10, 136 });
+ lstmq.run();
+ validate(CLAccessor(output_state), expected_output);
+
+ // Third input
+ fill_tensor(expected_output, std::vector<uint8_t> { 128, 129, 8, 140,
+ 128, 130, 6, 138 });
+ lstmq.run();
+ validate(CLAccessor(output_state), expected_output);
+}
+
+TEST_CASE(IntegrationTestCaseLarge, framework::DatasetMode::PRECOMMIT)
+{
+ const int batch_size = 16;
+ const int input_size = 8;
+ const int output_size = 8;
+
+
+ QuantizationInfo qasymm(1.f / 128.f, 128);
+ QuantizationInfo qweights(1.f / 128.f, 128);
+ QuantizationInfo qsymm_3(8.f / 32768.f, 0);
+ QuantizationInfo qsymm_4(16.f / 32768.f, 0);
+
+ TensorShape input_shape{ input_size, batch_size };
+ TensorShape input_weights_shape{ input_size, output_size };
+ TensorShape recurrent_weights_shape{ output_size, output_size };
+ TensorShape output_shape{ output_size, batch_size};
+ TensorShape bias_shape{ output_size };
+
+ auto input_to_input_weights = create_tensor<CLTensor>(input_weights_shape, DataType::QASYMM8, 1, qweights);
+ auto input_to_forget_weights = create_tensor<CLTensor>(input_weights_shape, DataType::QASYMM8, 1, qweights);
+ auto input_to_cell_weights = create_tensor<CLTensor>(input_weights_shape, DataType::QASYMM8, 1, qweights);
+ auto input_to_output_weights = create_tensor<CLTensor>(input_weights_shape, DataType::QASYMM8, 1, qweights);
+ auto recurrent_to_input_weights = create_tensor<CLTensor>(recurrent_weights_shape, DataType::QASYMM8, 1, qweights);
+ auto recurrent_to_forget_weights = create_tensor<CLTensor>(recurrent_weights_shape, DataType::QASYMM8, 1, qweights);
+ auto recurrent_to_cell_weights = create_tensor<CLTensor>(recurrent_weights_shape, DataType::QASYMM8, 1, qweights);
+ auto recurrent_to_output_weights = create_tensor<CLTensor>(recurrent_weights_shape, DataType::QASYMM8, 1, qweights);
+ auto input_gate_bias = create_tensor<CLTensor>(bias_shape, DataType::S32);
+ auto forget_gate_bias = create_tensor<CLTensor>(bias_shape, DataType::S32);
+ auto cell_gate_bias = create_tensor<CLTensor>(bias_shape, DataType::S32);
+ auto output_gate_bias = create_tensor<CLTensor>(bias_shape, DataType::S32);
+
+ // LSTM input
+ auto input = create_tensor<CLTensor>(input_shape, DataType::QASYMM8, 1, qasymm);
+
+ // LSTM output state
+ auto output_state = create_tensor<CLTensor>(output_shape, DataType::QASYMM8, 1, qasymm);
+
+ // LSTM cell state
+ auto cell_state = create_tensor<CLTensor>(output_shape, DataType::QSYMM16, 1, qsymm_4);
+
+ CLLSTMLayerQuantized lstmq;
+
+ lstmq.configure(&input, &input_to_input_weights, &input_to_forget_weights, &input_to_cell_weights, &input_to_output_weights,
+ &recurrent_to_input_weights, &recurrent_to_forget_weights, &recurrent_to_cell_weights, &recurrent_to_output_weights,
+ &input_gate_bias, &forget_gate_bias, &cell_gate_bias, &output_gate_bias, &cell_state, &output_state, &cell_state, &output_state);
+
+ input.allocator()->allocate();
+ input_to_input_weights.allocator()->allocate();
+ input_to_forget_weights.allocator()->allocate();
+ input_to_cell_weights.allocator()->allocate();
+ input_to_output_weights.allocator()->allocate();
+ recurrent_to_input_weights.allocator()->allocate();
+ recurrent_to_forget_weights.allocator()->allocate();
+ recurrent_to_cell_weights.allocator()->allocate();
+ recurrent_to_output_weights.allocator()->allocate();
+ input_gate_bias.allocator()->allocate();
+ forget_gate_bias.allocator()->allocate();
+ cell_gate_bias.allocator()->allocate();
+ output_gate_bias.allocator()->allocate();
+ cell_state.allocator()->allocate();
+ output_state.allocator()->allocate();
+
+ // Fill weights and biases
+ fill_tensor(input_to_input_weights, std::vector<uint8_t>{ 141, 89, 200, 180, 46, 50, 87, 128,
+ 149, 227, 177, 187, 212, 229, 54, 111,
+ 131, 116, 3, 58, 196, 26, 131, 255,
+ 22, 106, 216, 69, 239, 12, 232, 207,
+ 184, 56, 236, 172, 28, 143, 161, 124,
+ 255, 33, 197, 122, 47, 197, 26, 229,
+ 91, 79, 11, 160, 26, 80, 100, 36,
+ 248, 186, 97, 61, 125, 46, 14, 100, });
+
+ fill_tensor(input_to_forget_weights, std::vector<uint8_t> { 237, 165, 141, 249, 72, 116, 36 , 115,
+ 234, 213, 85, 84, 59, 62, 150, 246,
+ 182, 102, 158, 214, 182, 183, 94, 11,
+ 158, 192, 92, 189, 160, 219, 206, 249,
+ 88, 213, 193, 244, 151, 72, 129, 49,
+ 239, 83, 106, 9, 169, 187, 125, 171,
+ 32, 141, 126, 92, 13, 36, 224, 150,
+ 187, 250, 178, 169, 89, 214, 91, 173 });
+
+ fill_tensor(input_to_cell_weights, std::vector<uint8_t> { 93, 103, 226, 139, 185, 252, 129, 171,
+ 159, 32, 25, 175, 224, 183, 165, 35,
+ 207, 69, 238, 228, 149, 214, 79, 6,
+ 5, 66, 102, 14, 19, 111, 36, 143,
+ 22, 85, 13, 78, 236, 121, 122, 77,
+ 249, 39, 88, 12, 205, 143, 93, 240,
+ 167, 89, 188, 50, 73, 69, 201, 251,
+ 59, 32, 203, 184, 139, 191, 199, 74});
+
+ fill_tensor(input_to_output_weights, std::vector<uint8_t> { 205, 7, 95, 104, 252, 143, 226, 73,
+ 229, 114, 152, 171, 221, 153, 73, 229,
+ 153, 165, 223, 239, 100, 38, 172, 211,
+ 226, 133, 239, 207, 116, 230, 170, 100,
+ 241, 95, 171, 124, 63, 115, 32, 127,
+ 141, 239, 53, 193, 201, 53, 104, 178,
+ 186, 212, 167, 107, 226, 230, 71, 213,
+ 148, 217, 19, 248, 233, 195, 183, 156 });
+
+ fill_tensor(recurrent_to_input_weights, std::vector<uint8_t> { 147, 112, 140, 103, 3, 255, 17, 49,
+ 84, 112, 144, 213, 138, 142, 112, 66,
+ 117, 30, 101, 35, 25, 132, 211, 229,
+ 183, 208, 102, 16, 38, 85, 101, 152,
+ 226, 83, 132, 22, 161, 110, 157, 129,
+ 184, 63, 168, 42, 220, 126, 209, 157,
+ 5, 88, 243, 83, 249, 19, 226, 209,
+ 173, 96, 185, 77, 146, 227, 238, 136 });
+
+
+ fill_tensor(recurrent_to_forget_weights, std::vector<uint8_t> { 52, 132, 92, 200, 213, 32, 213, 37,
+ 116, 142, 116, 180, 4, 172, 158, 143,
+ 110, 40, 99, 28, 221, 153, 133, 2,
+ 247, 144, 198, 100, 20, 15, 221, 196,
+ 159, 178, 188, 151, 171, 15, 25, 217,
+ 178, 109, 110, 118, 128, 39, 232, 234,
+ 184, 214, 177, 13, 56, 6, 28, 252,
+ 89, 187, 242, 59, 146, 111, 132, 129});
+
+ fill_tensor(recurrent_to_cell_weights, std::vector<uint8_t> { 70, 44, 137, 29, 36, 127, 1, 241,
+ 26, 241, 142, 114, 67, 181, 49, 57,
+ 131, 152, 175, 77, 23, 63, 37, 124,
+ 150, 113, 95, 103, 110, 201, 69, 97,
+ 196, 242, 62, 214, 66, 19, 45, 135,
+ 22, 168, 149, 104, 77, 101, 36, 68,
+ 170, 116, 222, 100, 109, 1, 154, 18,
+ 133, 215, 105, 93, 31, 57, 231, 112 });
+
+
+ fill_tensor(recurrent_to_output_weights, std::vector<uint8_t> { 45 , 181 , 220 , 219 , 49 , 63 , 49 , 129,
+ 7 , 166 , 104 , 114 , 83 , 40 , 1 , 195,
+ 245 , 142 , 82 , 232 , 104 , 245 , 82 , 196,
+ 111 , 56 , 156 , 9 , 141 , 240 , 180 , 148,
+ 247 , 198 , 234 , 137 , 13 , 210 , 161 , 192,
+ 196 , 59 , 233 , 184 , 142 , 187 , 140 , 166,
+ 2 , 95 , 152 , 46 , 71 , 46 , 113 , 32,
+ 175 , 229 , 86 , 87 , 62 , 93 , 74 , 130});
+
+ fill_tensor(input_gate_bias, std::vector<int> { -40040, -106916, -92315, -79123, 45160, -17954, 50962, -63758 });
+ fill_tensor(forget_gate_bias, std::vector<int> { -128514, 8463, -57831, 116977, 106547, -28132, -124557, 44941 });
+ fill_tensor(cell_gate_bias, std::vector<int> { 88388 , 123601, -116148, -13022, 21619, 48926, 57523, 39332 });
+ fill_tensor(output_gate_bias, std::vector<int> { 59485 , -33070, 21386, -100633, -115959, 125768, -56407, 24897 });
+
+ SimpleTensor<uint8_t> expected_output(output_shape, DataType::QASYMM8, 1, qasymm);
+
+ // Initialize state
+ fill_tensor(output_state, std::vector<uint8_t> { 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128 });
+
+ fill_tensor(cell_state, std::vector<int16_t> { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0});
+
+ // First input
+ fill_tensor(input, std::vector<uint8_t> { 247, 203, 159, 131, 182, 114, 207, 195,
+ 48 , 61 , 154, 16, 80, 101, 116, 255,
+ 50 , 115 , 45, 186, 75, 212, 98, 48,
+ 88 , 146 , 24, 143, 218, 174, 203, 200,
+ 239 , 16 , 66, 136, 234, 54, 94, 51,
+ 101 , 128 , 220, 213, 164, 82, 137, 255,
+ 70 , 165 , 234, 220, 66, 35, 183, 206,
+ 39 , 57 , 180, 202, 23, 172, 224, 109,
+ 102 , 215 , 186, 82, 215, 147, 85, 187,
+ 96 , 249 , 59, 116, 150, 44, 167, 128,
+ 34 , 217 , 148, 193, 243, 38, 250, 208,
+ 112 , 130 , 208, 29, 16, 122, 20, 92,
+ 24 , 72 , 104, 29, 150, 233, 151, 19,
+ 158 , 192 , 254, 70, 73, 142, 106, 152,
+ 3 , 61 , 24, 135, 212, 9, 80, 234,
+ 147 , 246 , 83, 249, 49, 14, 68, 50});
+
+ fill_tensor(expected_output, std::vector<uint8_t> {131, 128, 128, 128, 128, 180, 129, 133,
+ 136, 128, 126, 128, 128, 173, 135, 130,
+ 160, 128, 128, 128, 128, 138, 132, 129,
+ 131, 128, 127, 128, 128, 169, 129, 131,
+ 133, 128, 128, 128, 128, 182, 130, 129,
+ 131, 128, 128, 128, 128, 163, 129, 130,
+ 131, 128, 128, 128, 128, 149, 132, 129,
+ 143, 128, 127, 128, 128, 150, 134, 131,
+ 134, 128, 128, 128, 128, 167, 130, 130,
+ 131, 128, 128, 128, 128, 152, 132, 129,
+ 128, 128, 128, 128, 128, 169, 130, 130,
+ 173, 128, 128, 128, 128, 148, 139, 130,
+ 152, 128, 128, 128, 128, 168, 139, 132,
+ 147, 128, 128, 128, 128, 161, 131, 132,
+ 130, 128, 128, 128, 128, 159, 134, 128,
+ 140, 128, 128, 128, 128, 133, 132, 128 });
+
+ lstmq.run();
+ validate(CLAccessor(output_state), expected_output);
+
+ // Second input
+ fill_tensor(expected_output, std::vector<uint8_t> { 130, 128, 128, 128, 128, 205, 129, 137,
+ 135, 128, 127, 128, 128, 190, 137, 132,
+ 160, 128, 128, 128, 128, 142, 133, 131,
+ 130, 128, 128, 128, 128, 185, 129, 133,
+ 132, 128, 128, 128, 128, 198, 131, 130,
+ 130, 128, 128, 128, 128, 178, 130, 131,
+ 131, 128, 128, 128, 128, 158, 132, 131,
+ 142, 128, 127, 128, 128, 158, 135, 134,
+ 133, 128, 128, 128, 128, 178, 131, 132,
+ 131, 128, 128, 128, 128, 160, 132, 130,
+ 128, 128, 128, 128, 128, 190, 131, 131,
+ 170, 128, 128, 128, 128, 157, 142, 131,
+ 149, 128, 128, 128, 128, 178, 142, 135,
+ 145, 128, 128, 128, 129, 173, 132, 135,
+ 129, 128, 128, 128, 128, 171, 134, 129,
+ 140, 128, 128, 128, 128, 135, 132, 129});
+ lstmq.run();
+ validate(CLAccessor(output_state), expected_output);
+}
+// clang-format on
+// *INDENT-ON*
+
+TEST_SUITE_END() // LSTMLayerQuantized
+TEST_SUITE_END() // NEON
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/WidthConcatenateLayer.cpp b/tests/validation/CL/WidthConcatenateLayer.cpp
index 52a4e4ccd6..7b894a63e0 100644
--- a/tests/validation/CL/WidthConcatenateLayer.cpp
+++ b/tests/validation/CL/WidthConcatenateLayer.cpp
@@ -98,9 +98,12 @@ TEST_CASE(Configuration, framework::DatasetMode::ALL)
ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
// Create and configure function
- CLConcatenateLayer concat_layer;
-
- concat_layer.configure({ &src1, &src2, &src3 }, &dst, 0);
+ CLConcatenateLayer concat_layer;
+ std::vector<ICLTensor *> inputs;
+ inputs.emplace_back(&src1);
+ inputs.emplace_back(&src2);
+ inputs.emplace_back(&src3);
+ concat_layer.configure(inputs, &dst, 0);
}
template <typename T>
diff --git a/tests/validation/NEON/LSTMLayerQuantized.cpp b/tests/validation/NEON/LSTMLayerQuantized.cpp
index 41c12c91e7..d5d036de33 100644
--- a/tests/validation/NEON/LSTMLayerQuantized.cpp
+++ b/tests/validation/NEON/LSTMLayerQuantized.cpp
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/runtime/NEON/functions/NELSTMLayer.h"
#include "arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h"
+
#include "tests/NEON/Accessor.h"
#include "tests/PaddingCalculator.h"
#include "tests/Utils.h"
@@ -131,8 +131,6 @@ TEST_CASE(IntegrationTestCaseSmall, framework::DatasetMode::PRECOMMIT)
output_gate_bias.allocator()->allocate();
cell_state.allocator()->allocate();
output_state.allocator()->allocate();
- cell_state.allocator()->allocate();
- output_state.allocator()->allocate();
// Fill weights and biases
fill_tensor(input_to_input_weights, std::vector<uint8_t>{ 47, 168,
@@ -452,7 +450,7 @@ TEST_CASE(IntegrationTestCaseLarge, framework::DatasetMode::PRECOMMIT)
// *INDENT-ON*
TEST_SUITE_END() // LSTMLayerQuantized
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // CL
} // namespace validation
} // namespace test
} // namespace arm_compute
diff --git a/tests/validation/fixtures/DequantizationLayerFixture.h b/tests/validation/fixtures/DequantizationLayerFixture.h
index 15f3711189..2c8f05746d 100644
--- a/tests/validation/fixtures/DequantizationLayerFixture.h
+++ b/tests/validation/fixtures/DequantizationLayerFixture.h
@@ -92,32 +92,46 @@ protected:
SimpleTensor<T> compute_reference(const TensorShape &shape, DataType src_data_type)
{
- if(is_data_type_quantized_asymmetric(src_data_type))
+ if(src_data_type == DataType::QASYMM8)
{
SimpleTensor<uint8_t> src{ shape, src_data_type, 1, _quantization_info };
fill(src);
return reference::dequantization_layer<T>(src);
}
- else
+ else if(src_data_type == DataType::QSYMM8)
{
SimpleTensor<int8_t> src{ shape, src_data_type, 1, _quantization_info };
fill(src);
return reference::dequantization_layer<T>(src);
}
+ else if(src_data_type == DataType::QSYMM16)
+ {
+ SimpleTensor<int16_t> src{ shape, src_data_type, 1, _quantization_info };
+ fill(src);
+ return reference::dequantization_layer<T>(src);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unsupported data type");
+ }
}
protected:
QuantizationInfo generate_quantization_info(DataType data_type)
{
- std::uniform_int_distribution<> distribution(1, 127);
std::mt19937 gen(library.get()->seed());
+ std::uniform_int_distribution<> distribution_scale_q8(1, 255);
+ std::uniform_int_distribution<> distribution_offset_q8(1, 127);
+ std::uniform_int_distribution<> distribution_scale_q16(1, 32768);
switch(data_type)
{
+ case DataType::QSYMM16:
+ return QuantizationInfo(1.f / distribution_scale_q16(gen));
case DataType::QSYMM8:
- return QuantizationInfo(1.f / distribution(gen));
+ return QuantizationInfo(1.f / distribution_scale_q8(gen));
case DataType::QASYMM8:
- return QuantizationInfo(1.f / distribution(gen), distribution(gen));
+ return QuantizationInfo(1.f / distribution_scale_q8(gen), distribution_offset_q8(gen));
default:
ARM_COMPUTE_ERROR("Unsupported data type");
}
diff --git a/tests/validation/reference/DequantizationLayer.cpp b/tests/validation/reference/DequantizationLayer.cpp
index d07371c883..cceee0421c 100644
--- a/tests/validation/reference/DequantizationLayer.cpp
+++ b/tests/validation/reference/DequantizationLayer.cpp
@@ -45,6 +45,11 @@ TOut dequantize(uint8_t val, const UniformQuantizationInfo qinfo)
{
return static_cast<TOut>(dequantize_qasymm8(val, qinfo));
}
+template <typename TOut>
+TOut dequantize(int16_t val, const UniformQuantizationInfo qinfo)
+{
+ return static_cast<TOut>(dequantize_qsymm16(val, qinfo));
+}
template <typename TOut, typename TIn>
SimpleTensor<TOut> dequantization_layer_nchw(const SimpleTensor<TIn> &src)
@@ -72,7 +77,7 @@ SimpleTensor<TOut> dequantization_layer_nchw(const SimpleTensor<TIn> &src)
// Dequantize slice
for(int s = 0; s < WH; ++s)
{
- dst[idx + s] = dequantize<TOut>(src[idx + s], channel_qinfo);
+ dst[idx + s] = dequantize<TOut>(static_cast<TIn>(src[idx + s]), channel_qinfo);
}
}
}
@@ -84,7 +89,7 @@ SimpleTensor<TOut> dequantization_layer_nchw(const SimpleTensor<TIn> &src)
for(int i = 0; i < src.num_elements(); ++i)
{
- dst[i] = static_cast<TOut>(dequantize<TOut>(src[i], quantization_info));
+ dst[i] = static_cast<TOut>(dequantize<TOut>(static_cast<TIn>(src[i]), quantization_info));
}
}
@@ -109,6 +114,8 @@ template SimpleTensor<half> dequantization_layer(const SimpleTensor<uint8_t> &sr
template SimpleTensor<float> dequantization_layer(const SimpleTensor<uint8_t> &src);
template SimpleTensor<half> dequantization_layer(const SimpleTensor<int8_t> &src);
template SimpleTensor<float> dequantization_layer(const SimpleTensor<int8_t> &src);
+template SimpleTensor<half> dequantization_layer(const SimpleTensor<int16_t> &src);
+template SimpleTensor<float> dequantization_layer(const SimpleTensor<int16_t> &src);
} // namespace reference
} // namespace validation
} // namespace test