aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2020-06-10 16:34:50 +0100
committerMichele Di Giorgio <michele.digiorgio@arm.com>2020-06-12 09:26:10 +0000
commit11c562c94fa6a0399aff798bfd970ed8c1942730 (patch)
tree6eb2ca628659b1b579a2f99a6dd598d5e6ee5ea2 /src
parentd004a7a707feab36e51f51cfc9eb2cb70729d5ad (diff)
downloadComputeLibrary-11c562c94fa6a0399aff798bfd970ed8c1942730.tar.gz
COMPMID-3527: QLSTM should allow nullptr for optional projection bias
Change-Id: I5568ca1e9383f3960886f211d8320fc4525a8804 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3326 Reviewed-by: James Conroy <james.conroy@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/kernels/CLElementwiseOperationKernel.cpp12
-rw-r--r--src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp15
-rw-r--r--src/runtime/CL/functions/CLQLSTMLayer.cpp16
-rw-r--r--src/runtime/NEON/functions/NEQLSTMLayer.cpp15
4 files changed, 47 insertions, 11 deletions
diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
index 00a97d50e9..4e7d3b3753 100644
--- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
+++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
@@ -93,9 +93,13 @@ Status validate_arguments_with_float_only_supported_rules(const ITensorInfo &inp
Status validate_arguments_with_arithmetic_rules(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&input1);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+ DataType::S16, DataType::QSYMM16, DataType::F16,
+ DataType::S32, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&input2);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+ DataType::S16, DataType::QSYMM16, DataType::F16,
+ DataType::S32, DataType::F32);
const bool is_quantized = is_data_type_quantized(input1.data_type()) || is_data_type_quantized(input2.data_type());
if(is_quantized)
@@ -119,7 +123,9 @@ Status validate_arguments_with_arithmetic_rules(const ITensorInfo &input1, const
if(output.total_size() > 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+ DataType::S16, DataType::QSYMM16, DataType::F16,
+ DataType::S32, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG((output.data_type() == DataType::U8) && ((input1.data_type() != DataType::U8) || (input2.data_type() != DataType::U8)),
"Output can only be U8 if both inputs are U8");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output.tensor_shape(), 0),
diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
index f8ee578ef8..3878c764a6 100644
--- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
@@ -815,8 +815,12 @@ Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2,
ARM_COMPUTE_UNUSED(policy);
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input1);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+ DataType::S16, DataType::QSYMM16, DataType::F16,
+ DataType::S32, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+ DataType::S16, DataType::QSYMM16, DataType::F16,
+ DataType::S32, DataType::F32);
const TensorShape out_shape = TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape());
@@ -834,6 +838,7 @@ Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2,
&& !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::S16 && output.data_type() == DataType::S16)
&& !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::U8 && output.data_type() == DataType::S16)
&& !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::S16 && output.data_type() == DataType::S16)
+ && !(input1.data_type() == DataType::S32 && input2.data_type() == DataType::S32 && output.data_type() == DataType::S32)
&& !(input1.data_type() == DataType::F32 && input2.data_type() == DataType::F32 && output.data_type() == DataType::F32)
&& !(input1.data_type() == DataType::F16 && input2.data_type() == DataType::F16 && output.data_type() == DataType::F16)
&& !(input1.data_type() == DataType::QASYMM8 && input2.data_type() == DataType::QASYMM8 && output.data_type() == DataType::QASYMM8)
@@ -862,6 +867,10 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo &input1, ITe
{
set_format_if_unknown(output, Format::S16);
}
+ if(input1.data_type() == DataType::S32 || input2.data_type() == DataType::S32)
+ {
+ set_format_if_unknown(output, Format::S32);
+ }
else if(input1.data_type() == DataType::F16 || input2.data_type() == DataType::F16)
{
set_format_if_unknown(output, Format::F16);
@@ -926,6 +935,8 @@ void NEArithmeticAdditionKernel::configure(const ITensor *input1, const ITensor
{ "add_saturate_U8_U8_S16", &add_U8_U8_S16 },
{ "add_wrap_S16_S16_S16", &add_same<int16_t> },
{ "add_saturate_S16_S16_S16", &add_same<int16_t> },
+ { "add_wrap_S32_S32_S32", &add_same<int32_t> },
+ { "add_saturate_S32_S32_S32", &add_same<int32_t> },
{ "add_wrap_F32_F32_F32", &add_same<float> },
{ "add_saturate_F32_F32_F32", &add_same<float> },
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
diff --git a/src/runtime/CL/functions/CLQLSTMLayer.cpp b/src/runtime/CL/functions/CLQLSTMLayer.cpp
index 524c7b3aae..f063410972 100644
--- a/src/runtime/CL/functions/CLQLSTMLayer.cpp
+++ b/src/runtime/CL/functions/CLQLSTMLayer.cpp
@@ -211,6 +211,10 @@ void CLQLSTMLayer::configure(const CLCompileContext &compile_context, const ICLT
if(_has_projection)
{
_projection_reduction.configure(compile_context, _projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true));
+ if(_projection_bias != nullptr)
+ {
+ _projection_bias_add.configure(compile_context, ArithmeticOperation::ADD, _projection_bias, &_projection_eff_bias, &_projection_eff_bias, ConvertPolicy::SATURATE);
+ }
}
// Pre-transpose weights to be used in GEMM.
@@ -640,6 +644,12 @@ Status CLQLSTMLayer::validate(const ITensorInfo *input,
ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixAReductionKernel::validate(lstm_params.projection_weights(), &projection_eff_bias_info, GEMMLowpReductionKernelInfo(output_size, false,
lstm_params.hidden_state_zero(),
true)));
+ if(lstm_params.projection_bias() != nullptr)
+ {
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lstm_params.projection_bias(), 1, DataType::S32);
+ ARM_COMPUTE_RETURN_ON_ERROR(CLSaturatedArithmeticOperationKernel::validate(ArithmeticOperation::ADD, lstm_params.projection_bias(), &projection_eff_bias_info,
+ &projection_eff_bias_info, ConvertPolicy::SATURATE));
+ }
}
const TensorInfo input_weights_transposed(TensorShape(num_units, input_size), 1, input_to_forget_weights->data_type(), input_to_forget_weights->quantization_info());
@@ -832,7 +842,6 @@ Status CLQLSTMLayer::validate(const ITensorInfo *input,
if(lstm_params.has_projection())
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(recurrent_to_forget_weights, lstm_params.projection_weights());
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(forget_gate_bias, lstm_params.projection_bias());
ARM_COMPUTE_RETURN_ERROR_ON(qoutput_state_in.scale == 0);
const UniformQuantizationInfo qprojection = lstm_params.projection_weights()->quantization_info().uniform();
@@ -1095,10 +1104,11 @@ void CLQLSTMLayer::prepare()
if(_has_projection)
{
+ _projection_eff_bias.allocator()->allocate();
+ CLScheduler::get().enqueue(_projection_reduction);
if(_projection_bias != nullptr)
{
- _projection_eff_bias.allocator()->allocate();
- CLScheduler::get().enqueue(_projection_reduction);
+ CLScheduler::get().enqueue(_projection_bias_add);
_projection_bias->mark_as_unused();
}
diff --git a/src/runtime/NEON/functions/NEQLSTMLayer.cpp b/src/runtime/NEON/functions/NEQLSTMLayer.cpp
index 083e3fddb4..a22c669ca7 100644
--- a/src/runtime/NEON/functions/NEQLSTMLayer.cpp
+++ b/src/runtime/NEON/functions/NEQLSTMLayer.cpp
@@ -189,6 +189,10 @@ void NEQLSTMLayer::configure(const ITensor *input,
if(_has_projection)
{
_projection_reduction.configure(_projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true));
+ if(_projection_bias != nullptr)
+ {
+ _projection_bias_add.configure(_projection_bias, &_projection_eff_bias, &_projection_eff_bias, ConvertPolicy::SATURATE);
+ }
}
// Pre-transpose weights to be used in GEMM.
@@ -612,6 +616,11 @@ Status NEQLSTMLayer::validate(const ITensorInfo *input,
ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(lstm_params.projection_weights(), &projection_eff_bias_info, GEMMLowpReductionKernelInfo(output_size, false,
lstm_params.hidden_state_zero(),
true)));
+ if(lstm_params.projection_bias() != nullptr)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lstm_params.projection_bias(), 1, DataType::S32);
+ ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate(lstm_params.projection_bias(), &projection_eff_bias_info, &projection_eff_bias_info, ConvertPolicy::SATURATE));
+ }
}
const TensorInfo input_weights_transposed(TensorShape(num_units, input_size), 1, input_to_forget_weights->data_type(), input_to_forget_weights->quantization_info());
@@ -804,7 +813,6 @@ Status NEQLSTMLayer::validate(const ITensorInfo *input,
if(lstm_params.has_projection())
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(recurrent_to_forget_weights, lstm_params.projection_weights());
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(forget_gate_bias, lstm_params.projection_bias());
ARM_COMPUTE_RETURN_ERROR_ON(qoutput_state_in.scale == 0);
const UniformQuantizationInfo qprojection = lstm_params.projection_weights()->quantization_info().uniform();
@@ -1065,10 +1073,11 @@ void NEQLSTMLayer::prepare()
if(_has_projection)
{
+ _projection_eff_bias.allocator()->allocate();
+ NEScheduler::get().schedule(&_projection_reduction, Window::DimY);
if(_projection_bias != nullptr)
{
- _projection_eff_bias.allocator()->allocate();
- NEScheduler::get().schedule(&_projection_reduction, Window::DimY);
+ NEScheduler::get().schedule(&_projection_bias_add, Window::DimY);
_projection_bias->mark_as_unused();
}