aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2020-06-23 17:25:43 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2020-06-25 13:14:06 +0000
commit173ba9bbb19ea83f951318d9989e440768b4de8f (patch)
tree840a28e1cc4d0adf47097c8ab27092531c8e0958 /src
parent0f954eb6c8bf2f6c8600c56f21fec6aa9ebf082e (diff)
downloadComputeLibrary-173ba9bbb19ea83f951318d9989e440768b4de8f.tar.gz
COMPMID-3373: Async support to NEArithmetic* kernels/functions (Pt. 1)
Added support on NEArithmeticAddition and NEArithmeticSubtraction Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Change-Id: Ifa805f8455ef6eff1ee627752dc1c7fe9740ec47 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3451 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp26
-rw-r--r--src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp44
-rw-r--r--src/runtime/NEON/functions/NEArithmeticAddition.cpp46
-rw-r--r--src/runtime/NEON/functions/NEArithmeticSubtraction.cpp47
-rw-r--r--src/runtime/NEON/functions/NEGEMM.cpp8
-rw-r--r--src/runtime/NEON/functions/NELSTMLayer.cpp16
-rw-r--r--src/runtime/NEON/functions/NEQLSTMLayer.cpp45
-rw-r--r--src/runtime/NEON/functions/NERNNLayer.cpp8
8 files changed, 161 insertions, 79 deletions
diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
index 3878c764a6..1459f7f250 100644
--- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
@@ -853,7 +853,7 @@ Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2,
return Status{};
}
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output)
+std::pair<Status, Window> validate_and_configure_window(const ITensorInfo &input1, const ITensorInfo &input2, ITensorInfo &output)
{
const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(input1, input2);
const TensorShape &out_shape = broadcast_pair.first;
@@ -904,17 +904,17 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo &input1, ITe
} // namespace
NEArithmeticAdditionKernel::NEArithmeticAdditionKernel()
- : _func(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr), _policy()
+ : _func(nullptr), _policy()
{
}
-void NEArithmeticAdditionKernel::configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy)
+void NEArithmeticAdditionKernel::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1->info(), *input2->info(), *output->info(), policy));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1, *input2, *output, policy));
// Configure kernel window
- auto win_config = validate_and_configure_window(*input1->info(), *input2->info(), *output->info());
+ auto win_config = validate_and_configure_window(*input1, *input2, *output);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
static std::map<std::string, AddFunction *> map_function =
@@ -945,16 +945,13 @@ void NEArithmeticAdditionKernel::configure(const ITensor *input1, const ITensor
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
};
- _input1 = input1;
- _input2 = input2;
- _output = output;
_policy = policy;
std::string function_to_call("add_");
function_to_call += policy == ConvertPolicy::WRAP ? "wrap_" : "saturate_";
- function_to_call += string_from_data_type(input1->info()->data_type()) + "_";
- function_to_call += string_from_data_type(input2->info()->data_type()) + "_";
- function_to_call += string_from_data_type(output->info()->data_type());
+ function_to_call += string_from_data_type(input1->data_type()) + "_";
+ function_to_call += string_from_data_type(input2->data_type()) + "_";
+ function_to_call += string_from_data_type(output->data_type());
auto it = map_function.find(function_to_call);
@@ -976,13 +973,12 @@ Status NEArithmeticAdditionKernel::validate(const ITensorInfo *input1, const ITe
return Status{};
}
-void NEArithmeticAdditionKernel::run(const Window &window, const ThreadInfo &info)
+void NEArithmeticAdditionKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
- ARM_COMPUTE_ERROR_ON(_func == nullptr);
-
- (*_func)(_input1, _input2, _output, _policy, window);
+ // Dispatch kernel
+ (*_func)(inputs.at(TensorType::ACL_SRC_0), inputs.at(TensorType::ACL_SRC_1), outputs.at(TensorType::ACL_DST), _policy, window);
}
} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
index 2b3fce3fea..2097d761a7 100644
--- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
@@ -719,35 +719,32 @@ inline Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &i
} // namespace
NEArithmeticSubtractionKernel::NEArithmeticSubtractionKernel()
- : _func(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr), _policy(ConvertPolicy::WRAP)
+ : _func(nullptr), _policy(ConvertPolicy::WRAP)
{
}
-void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy)
+void NEArithmeticSubtractionKernel::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1->info(), *input2->info(), *output->info(), policy));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1, *input2, *output, policy));
- _input1 = input1;
- _input2 = input2;
- _output = output;
_policy = policy;
- const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*input1->info(), *input2->info());
+ const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*input1, *input2);
const TensorShape &out_shape = broadcast_pair.first;
const ValidRegion &valid_region = broadcast_pair.second;
// Auto initialize output if not initialized
- set_shape_if_empty(*output->info(), out_shape);
+ set_shape_if_empty(*output, out_shape);
- switch(input1->info()->data_type())
+ switch(input1->data_type())
{
case DataType::U8:
- if(input2->info()->data_type() == DataType::U8 && output->info()->data_type() == DataType::U8)
+ if(input2->data_type() == DataType::U8 && output->data_type() == DataType::U8)
{
_func = &sub_same<uint8_t>;
}
- else if(input2->info()->data_type() == DataType::U8 && output->info()->data_type() == DataType::S16)
+ else if(input2->data_type() == DataType::U8 && output->data_type() == DataType::S16)
{
_func = &sub_U8_U8_S16;
}
@@ -758,14 +755,14 @@ void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITens
break;
case DataType::QASYMM8:
_func = &sub_quantized<uint8_t>;
- set_data_type_if_unknown(*output->info(), DataType::QASYMM8);
+ set_data_type_if_unknown(*output, DataType::QASYMM8);
break;
case DataType::QASYMM8_SIGNED:
_func = &sub_quantized<int8_t>;
- set_data_type_if_unknown(*output->info(), DataType::QASYMM8_SIGNED);
+ set_data_type_if_unknown(*output, DataType::QASYMM8_SIGNED);
break;
case DataType::S16:
- if(input2->info()->data_type() == DataType::U8)
+ if(input2->data_type() == DataType::U8)
{
_func = &sub_S16_U8_S16;
}
@@ -773,21 +770,21 @@ void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITens
{
_func = &sub_same<int16_t>;
}
- set_format_if_unknown(*output->info(), Format::S16);
+ set_format_if_unknown(*output, Format::S16);
break;
case DataType::QSYMM16:
_func = &sub_QSYMM16_QSYMM16_QSYMM16;
- set_data_type_if_unknown(*output->info(), DataType::QSYMM16);
+ set_data_type_if_unknown(*output, DataType::QSYMM16);
break;
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
_func = &sub_same<float16_t>;
- set_format_if_unknown(*output->info(), Format::F16);
+ set_format_if_unknown(*output, Format::F16);
break;
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
case DataType::F32:
_func = &sub_same<float>;
- set_format_if_unknown(*output->info(), Format::F32);
+ set_format_if_unknown(*output, Format::F32);
break;
default:
_func = nullptr;
@@ -795,8 +792,8 @@ void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITens
// NEArithmeticSubtractionKernel doesn't need padding so update_window_and_padding() can be skipped
Coordinates coord;
- coord.set_num_dimensions(output->info()->num_dimensions());
- output->info()->set_valid_region(valid_region);
+ coord.set_num_dimensions(output->num_dimensions());
+ output->set_valid_region(valid_region);
Window win = calculate_max_window(valid_region, Steps());
INEKernel::configure(win);
@@ -810,13 +807,12 @@ Status NEArithmeticSubtractionKernel::validate(const ITensorInfo *input1, const
return Status{};
}
-void NEArithmeticSubtractionKernel::run(const Window &window, const ThreadInfo &info)
+void NEArithmeticSubtractionKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
- ARM_COMPUTE_ERROR_ON(_func == nullptr);
-
- (*_func)(_input1, _input2, _output, window, (_policy == ConvertPolicy::SATURATE));
+ // Dispatch kernel
+ (*_func)(inputs.at(TensorType::ACL_SRC_0), inputs.at(TensorType::ACL_SRC_1), outputs.at(TensorType::ACL_DST), window, (_policy == ConvertPolicy::SATURATE));
}
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEArithmeticAddition.cpp b/src/runtime/NEON/functions/NEArithmeticAddition.cpp
index 06c71db1bd..3a2848c3a7 100644
--- a/src/runtime/NEON/functions/NEArithmeticAddition.cpp
+++ b/src/runtime/NEON/functions/NEArithmeticAddition.cpp
@@ -31,7 +31,9 @@
namespace arm_compute
{
-void NEArithmeticAddition::configure(ITensor *input1, ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
+namespace experimental
+{
+void NEArithmeticAddition::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_UNUSED(act_info);
auto k = arm_compute::support::cpp14::make_unique<NEArithmeticAdditionKernel>();
@@ -43,4 +45,46 @@ Status NEArithmeticAddition::validate(const ITensorInfo *input1, const ITensorIn
ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled());
return NEArithmeticAdditionKernel::validate(input1, input2, output, policy);
}
+MemoryRequirements NEArithmeticAddition::workspace() const
+{
+ return MemoryRequirements{};
+}
+} // namespace experimental
+
+struct NEArithmeticAddition::Impl
+{
+ const ITensor *src_0{ nullptr };
+ const ITensor *src_1{ nullptr };
+ ITensor *dst{ nullptr };
+ std::unique_ptr<experimental::NEArithmeticAddition> op{ nullptr };
+};
+
+NEArithmeticAddition::NEArithmeticAddition()
+ : _impl(support::cpp14::make_unique<Impl>())
+{
+}
+NEArithmeticAddition::NEArithmeticAddition(NEArithmeticAddition &&) = default;
+NEArithmeticAddition &NEArithmeticAddition::operator=(NEArithmeticAddition &&) = default;
+NEArithmeticAddition::~NEArithmeticAddition() = default;
+
+Status NEArithmeticAddition::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
+{
+ return experimental::NEArithmeticAddition::validate(input1, input2, output, policy, act_info);
+}
+
+void NEArithmeticAddition::configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
+{
+ _impl->src_0 = input1;
+ _impl->src_1 = input2;
+ _impl->dst = output;
+ _impl->op = arm_compute::support::cpp14::make_unique<experimental::NEArithmeticAddition>();
+ _impl->op->configure(input1->info(), input2->info(), output->info(), policy, act_info);
+}
+
+void NEArithmeticAddition::run()
+{
+ const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
+ const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+ _impl->op->run(src, dst, {});
+}
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp b/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp
index 20f930a286..043250ca68 100644
--- a/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp
+++ b/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp
@@ -31,7 +31,9 @@
namespace arm_compute
{
-void NEArithmeticSubtraction::configure(ITensor *input1, ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
+namespace experimental
+{
+void NEArithmeticSubtraction::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_UNUSED(act_info);
auto k = arm_compute::support::cpp14::make_unique<NEArithmeticSubtractionKernel>();
@@ -44,4 +46,47 @@ Status NEArithmeticSubtraction::validate(const ITensorInfo *input1, const ITenso
ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled());
return NEArithmeticSubtractionKernel::validate(input1, input2, output, policy);
}
+
+MemoryRequirements NEArithmeticSubtraction::workspace() const
+{
+ return MemoryRequirements{};
+}
+} // namespace experimental
+
+struct NEArithmeticSubtraction::Impl
+{
+ const ITensor *src_0{ nullptr };
+ const ITensor *src_1{ nullptr };
+ ITensor *dst{ nullptr };
+ std::unique_ptr<experimental::NEArithmeticSubtraction> op{ nullptr };
+};
+
+NEArithmeticSubtraction::NEArithmeticSubtraction()
+ : _impl(support::cpp14::make_unique<Impl>())
+{
+}
+NEArithmeticSubtraction::NEArithmeticSubtraction(NEArithmeticSubtraction &&) = default;
+NEArithmeticSubtraction &NEArithmeticSubtraction::operator=(NEArithmeticSubtraction &&) = default;
+NEArithmeticSubtraction::~NEArithmeticSubtraction() = default;
+
+Status NEArithmeticSubtraction::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
+{
+ return experimental::NEArithmeticSubtraction::validate(input1, input2, output, policy, act_info);
+}
+
+void NEArithmeticSubtraction::configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
+{
+ _impl->src_0 = input1;
+ _impl->src_1 = input2;
+ _impl->dst = output;
+ _impl->op = arm_compute::support::cpp14::make_unique<experimental::NEArithmeticSubtraction>();
+ _impl->op->configure(input1->info(), input2->info(), output->info(), policy, act_info);
+}
+
+void NEArithmeticSubtraction::run()
+{
+ const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
+ const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+ _impl->op->run(src, dst, {});
+}
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp
index 89dd4a15d0..5fc12e585a 100644
--- a/src/runtime/NEON/functions/NEGEMM.cpp
+++ b/src/runtime/NEON/functions/NEGEMM.cpp
@@ -43,7 +43,7 @@ namespace arm_compute
{
NEGEMM::NEGEMM(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
: _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(memory_manager, weights_manager), _ma_kernel(),
- _alpha_scale_func(nullptr), _add_bias_kernel(), _activation_func(), _tmp_a(), _tmp_b(), _tmp_d(), _original_b(nullptr), _run_vector_matrix_multiplication(false), _run_alpha_scale(false),
+ _alpha_scale_func(nullptr), _add_bias(), _activation_func(), _tmp_a(), _tmp_b(), _tmp_d(), _original_b(nullptr), _run_vector_matrix_multiplication(false), _run_alpha_scale(false),
_run_addition(false), _run_bias_addition(false), _run_activation(false), _reshape_b_only_on_first_run(false), _is_prepared(false)
{
}
@@ -141,7 +141,7 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe
if(_run_bias_addition)
{
- _add_bias_kernel.configure(gemm_output_to_use, c, d, ConvertPolicy::SATURATE);
+ _add_bias.configure(gemm_output_to_use, c, d, ConvertPolicy::SATURATE);
_tmp_d.allocator()->allocate();
}
}
@@ -258,7 +258,7 @@ Status NEGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso
if(c != nullptr && gemm_info.reshape_b_only_on_first_run())
{
- ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate(&tmp_output_info, c, output, ConvertPolicy::SATURATE));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&tmp_output_info, c, output, ConvertPolicy::SATURATE));
}
}
@@ -311,7 +311,7 @@ void NEGEMM::run()
// Run bias addition kernel
if(_run_bias_addition)
{
- NEScheduler::get().schedule(&_add_bias_kernel, Window::DimY);
+ _add_bias.run();
}
}
diff --git a/src/runtime/NEON/functions/NELSTMLayer.cpp b/src/runtime/NEON/functions/NELSTMLayer.cpp
index 0a111363e3..d8c684bf15 100644
--- a/src/runtime/NEON/functions/NELSTMLayer.cpp
+++ b/src/runtime/NEON/functions/NELSTMLayer.cpp
@@ -512,7 +512,7 @@ Status NELSTMLayer::validate(const ITensorInfo *input,
}
else
{
- ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticSubtractionKernel::validate(&forget_gate, &forget_gate, &forget_gate, ConvertPolicy::SATURATE));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticSubtraction::validate(&forget_gate, &forget_gate, &forget_gate, ConvertPolicy::SATURATE));
}
// Validate cell state
@@ -610,7 +610,7 @@ void NELSTMLayer::run()
{
_mean_std_norm_forget_gate.run();
NEScheduler::get().schedule(&_pixelwise_mul_forget_gate_coeff, Window::DimY);
- NEScheduler::get().schedule(&_accum_forget_gate_bias, Window::DimY);
+ _accum_forget_gate_bias.run();
}
_activation_forget_gate.run();
@@ -624,7 +624,7 @@ void NELSTMLayer::run()
{
std::fill_n(reinterpret_cast<float *>(_ones.buffer()), _ones.info()->total_size() / _ones.info()->element_size(), 1);
}
- NEScheduler::get().schedule(&_subtract_input_gate, Window::DimY);
+ _subtract_input_gate.run();
}
else
{
@@ -640,7 +640,7 @@ void NELSTMLayer::run()
{
_mean_std_norm_input_gate.run();
NEScheduler::get().schedule(&_pixelwise_mul_input_gate_coeff, Window::DimY);
- NEScheduler::get().schedule(&_accum_input_gate_bias, Window::DimY);
+ _accum_input_gate_bias.run();
}
_activation_input_gate.run();
}
@@ -648,17 +648,17 @@ void NELSTMLayer::run()
_fully_connected_cell_state.run();
NEScheduler::get().schedule(&_transpose_cell_state, Window::DimY);
_gemm_cell_state1.run();
- NEScheduler::get().schedule(&_accum_cell_state1, Window::DimY);
+ _accum_cell_state1.run();
if(_is_layer_norm_lstm)
{
_mean_std_norm_cell_gate.run();
NEScheduler::get().schedule(&_pixelwise_mul_cell_gate_coeff, Window::DimY);
- NEScheduler::get().schedule(&_accum_cell_gate_bias, Window::DimY);
+ _accum_cell_gate_bias.run();
}
_activation_cell_state.run();
NEScheduler::get().schedule(&_pixelwise_mul_cell_state1, Window::DimY);
NEScheduler::get().schedule(&_pixelwise_mul_cell_state2, Window::DimY);
- NEScheduler::get().schedule(&_accum_cell_state2, Window::DimY);
+ _accum_cell_state2.run();
if(_perform_cell_clipping)
{
@@ -675,7 +675,7 @@ void NELSTMLayer::run()
{
_mean_std_norm_output_gate.run();
NEScheduler::get().schedule(&_pixelwise_mul_output_gate_coeff, Window::DimY);
- NEScheduler::get().schedule(&_accum_output_gate_bias, Window::DimY);
+ _accum_output_gate_bias.run();
}
_activation_output.run();
diff --git a/src/runtime/NEON/functions/NEQLSTMLayer.cpp b/src/runtime/NEON/functions/NEQLSTMLayer.cpp
index a22c669ca7..6eb1844a1f 100644
--- a/src/runtime/NEON/functions/NEQLSTMLayer.cpp
+++ b/src/runtime/NEON/functions/NEQLSTMLayer.cpp
@@ -619,7 +619,7 @@ Status NEQLSTMLayer::validate(const ITensorInfo *input,
if(lstm_params.projection_bias() != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lstm_params.projection_bias(), 1, DataType::S32);
- ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate(lstm_params.projection_bias(), &projection_eff_bias_info, &projection_eff_bias_info, ConvertPolicy::SATURATE));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(lstm_params.projection_bias(), &projection_eff_bias_info, &projection_eff_bias_info, ConvertPolicy::SATURATE));
}
}
@@ -662,7 +662,7 @@ Status NEQLSTMLayer::validate(const ITensorInfo *input,
const float recurrent_to_forget_scale = recurrent_to_forget_weights->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.forget_intermediate_scale();
ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_forget_scale, &mm_out_info, &forget_outstage_info));
- ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate(&forget_outstage_info, &forget_outstage_info, &forget_outstage_info, ConvertPolicy::SATURATE));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&forget_outstage_info, &forget_outstage_info, &forget_outstage_info, ConvertPolicy::SATURATE));
if(lstm_params.has_peephole_opt())
{
@@ -672,7 +672,7 @@ Status NEQLSTMLayer::validate(const ITensorInfo *input,
const float cell_to_forget_scale = std::pow(2, cell_shift) * lstm_params.cell_to_forget_weights()->quantization_info().uniform().scale / lstm_params.forget_intermediate_scale();
ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(cell_to_forget_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOutputStage::validate(&mm_out_info, nullptr, &forget_outstage_info, gemmlowp_info));
- ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate(&forget_outstage_info, &forget_outstage_info, &forget_outstage_info, ConvertPolicy::SATURATE));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&forget_outstage_info, &forget_outstage_info, &forget_outstage_info, ConvertPolicy::SATURATE));
}
if(has_layer_norm)
@@ -697,7 +697,7 @@ Status NEQLSTMLayer::validate(const ITensorInfo *input,
const float recurrent_to_cell_scale = recurrent_to_cell_weights->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.cell_intermediate_scale();
ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_cell_scale, &mm_out_info, &cell_outstage_info));
- ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate(&cell_outstage_info, &cell_outstage_info, &cell_outstage_info, ConvertPolicy::SATURATE));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&cell_outstage_info, &cell_outstage_info, &cell_outstage_info, ConvertPolicy::SATURATE));
if(has_layer_norm)
{
@@ -714,7 +714,7 @@ Status NEQLSTMLayer::validate(const ITensorInfo *input,
if(lstm_params.has_cifg_opt())
{
ARM_COMPUTE_RETURN_ERROR_ON_MSG(lstm_params.input_gate_bias() != nullptr, "Input gate bias must not be present when CIFG is used");
- ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticSubtractionKernel::validate(&input_gate_info, &forget_gate_info, &forget_gate_info, ConvertPolicy::SATURATE));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticSubtraction::validate(&input_gate_info, &forget_gate_info, &forget_gate_info, ConvertPolicy::SATURATE));
}
else
{
@@ -733,7 +733,7 @@ Status NEQLSTMLayer::validate(const ITensorInfo *input,
const float recurrent_to_input_scale = lstm_params.recurrent_to_input_weights()->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.input_intermediate_scale();
ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_input_scale, &mm_out_info, &input_outstage_info));
- ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate(&input_outstage_info, &input_outstage_info, &input_outstage_info, ConvertPolicy::SATURATE));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&input_outstage_info, &input_outstage_info, &input_outstage_info, ConvertPolicy::SATURATE));
if(lstm_params.has_peephole_opt())
{
@@ -742,7 +742,7 @@ Status NEQLSTMLayer::validate(const ITensorInfo *input,
const float cell_to_input_scale = std::pow(2, cell_shift) * lstm_params.cell_to_input_weights()->quantization_info().uniform().scale / lstm_params.input_intermediate_scale();
ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(cell_to_input_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOutputStage::validate(&mm_out_info, &eff_bias_info, &input_outstage_info, gemmlowp_info));
- ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate(&input_outstage_info, &input_outstage_info, &input_outstage_info, ConvertPolicy::SATURATE));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&input_outstage_info, &input_outstage_info, &input_outstage_info, ConvertPolicy::SATURATE));
}
if(has_layer_norm)
@@ -757,7 +757,7 @@ Status NEQLSTMLayer::validate(const ITensorInfo *input,
// Cell.
ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplicationKernel::validate(&forget_gate_info, cell_state_in, &forget_gate_info, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO));
ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplicationKernel::validate(&input_gate_info, cell_state_in, &cell_gate_info, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO));
- ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate(&forget_gate_info, &cell_gate_info, cell_state_out, ConvertPolicy::SATURATE));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&forget_gate_info, &cell_gate_info, cell_state_out, ConvertPolicy::SATURATE));
if(quantized_cell_clip > 0)
{
ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(cell_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -quantized_cell_clip,
@@ -772,7 +772,7 @@ Status NEQLSTMLayer::validate(const ITensorInfo *input,
const float recurrent_to_output_scale = recurrent_to_output_weights->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.output_intermediate_scale();
ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_output_scale, &mm_out_info, &output_outstage_info));
- ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate(&output_outstage_info, &output_outstage_info, &output_outstage_info, ConvertPolicy::SATURATE));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&output_outstage_info, &output_outstage_info, &output_outstage_info, ConvertPolicy::SATURATE));
if(lstm_params.has_peephole_opt())
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lstm_params.cell_to_output_weights(), 1, DataType::QSYMM16);
@@ -782,7 +782,7 @@ Status NEQLSTMLayer::validate(const ITensorInfo *input,
// ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(cell_to_output_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplicationKernel::validate(cell_state_out, lstm_params.cell_to_output_weights(), &output_outstage_info, 1.f, ConvertPolicy::SATURATE,
RoundingPolicy::TO_ZERO));
- ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate(&output_outstage_info, &output_outstage_info, &output_outstage_info, ConvertPolicy::SATURATE));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&output_outstage_info, &output_outstage_info, &output_outstage_info, ConvertPolicy::SATURATE));
}
if(has_layer_norm)
@@ -837,7 +837,7 @@ Status NEQLSTMLayer::validate(const ITensorInfo *input,
ARM_COMPUTE_RETURN_ON_ERROR(NEQLSTMLayer::TensorCopyKernel::validate(*output_state_out, projection_outstage_info));
}
- ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate(output_state_out, output_state_out, output_state_out, ConvertPolicy::SATURATE));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(output_state_out, output_state_out, output_state_out, ConvertPolicy::SATURATE));
if(projection_tensor_copy_required)
{
@@ -893,13 +893,13 @@ void NEQLSTMLayer::run()
_mm_recurrent_to_forget.run();
_recurrent_to_forget_outstage.run();
- NEScheduler::get().schedule(&_accumulate_input_recurrent_forget, Window::DimY);
+ _accumulate_input_recurrent_forget.run();
if(_has_peephole)
{
NEScheduler::get().schedule(&_pixelwise_mul_cell_to_forget, Window::DimY);
_cell_to_forget_outstage.run();
- NEScheduler::get().schedule(&_accumulate_cell_forget, Window::DimY);
+ _accumulate_cell_forget.run();
}
if(_has_layer_norm)
@@ -915,7 +915,7 @@ void NEQLSTMLayer::run()
_mm_recurrent_to_cell.run();
_recurrent_to_cell_outstage.run();
- NEScheduler::get().schedule(&_accumulate_input_recurrent_modulation, Window::DimY);
+ _accumulate_input_recurrent_modulation.run();
if(_has_layer_norm)
{
@@ -927,7 +927,7 @@ void NEQLSTMLayer::run()
// Input gate
if(_has_cifg)
{
- NEScheduler::get().schedule(&_input_gate_sub, Window::DimY);
+ _input_gate_sub.run();
}
else
{
@@ -935,13 +935,13 @@ void NEQLSTMLayer::run()
_input_to_input_outstage.run();
_mm_recurrent_to_input.run();
_recurrent_to_input_outstage.run();
- NEScheduler::get().schedule(&_accumulate_input_recurrent_input, Window::DimY);
+ _accumulate_input_recurrent_input.run();
if(_has_peephole)
{
NEScheduler::get().schedule(&_pixelwise_mul_cell_to_input, Window::DimY);
_cell_to_input_outstage.run();
- NEScheduler::get().schedule(&_accumulate_cell_input, Window::DimY);
+ _accumulate_cell_input.run();
}
if(_has_layer_norm)
@@ -955,7 +955,8 @@ void NEQLSTMLayer::run()
// Cell.
NEScheduler::get().schedule(&_pixelwise_mul_forget_cell, Window::DimY);
NEScheduler::get().schedule(&_pixelwise_mul_input_cell, Window::DimY);
- NEScheduler::get().schedule(&_add_forget_cell, Window::DimY);
+ _add_forget_cell.run();
+
if(_has_cell_clipping)
{
_cell_clip.run();
@@ -966,12 +967,12 @@ void NEQLSTMLayer::run()
_input_to_output_outstage.run();
_mm_recurrent_to_output.run();
_recurrent_to_output_outstage.run();
- NEScheduler::get().schedule(&_accumulate_input_recurrent_output, Window::DimY);
+ _accumulate_input_recurrent_output.run();
if(_has_peephole)
{
NEScheduler::get().schedule(&_pixelwise_mul_cell_to_output, Window::DimY);
_cell_to_output_outstage.run();
- NEScheduler::get().schedule(&_accumulate_cell_to_output, Window::DimY);
+ _accumulate_cell_to_output.run();
}
if(_has_layer_norm)
@@ -997,7 +998,7 @@ void NEQLSTMLayer::run()
_projection_output_to_accumulate_copy.run();
}
- NEScheduler::get().schedule(&_accumulate_projection, Window::DimY);
+ _accumulate_projection.run();
if(_projection_tensor_copy_required)
{
@@ -1077,7 +1078,7 @@ void NEQLSTMLayer::prepare()
NEScheduler::get().schedule(&_projection_reduction, Window::DimY);
if(_projection_bias != nullptr)
{
- NEScheduler::get().schedule(&_projection_bias_add, Window::DimY);
+ _projection_bias_add.run();
_projection_bias->mark_as_unused();
}
diff --git a/src/runtime/NEON/functions/NERNNLayer.cpp b/src/runtime/NEON/functions/NERNNLayer.cpp
index 4a15777be9..19b84e7fb8 100644
--- a/src/runtime/NEON/functions/NERNNLayer.cpp
+++ b/src/runtime/NEON/functions/NERNNLayer.cpp
@@ -34,7 +34,7 @@
namespace arm_compute
{
NERNNLayer::NERNNLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation(), _fully_connected(memory_manager), _copy_kernel(), _fully_connected_out(), _gemm_output(), _add_output(),
+ : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_f(), _activation(), _fully_connected(memory_manager), _copy_kernel(), _fully_connected_out(), _gemm_output(), _add_output(),
_is_prepared(false)
{
}
@@ -59,7 +59,7 @@ Status NERNNLayer::validate(const ITensorInfo *input, const ITensorInfo *weights
auto shape_info = TensorInfo(misc::shape_calculator::compute_rnn_shape(recurrent_weights, hidden_state->dimension(idx_height)), 1, input->data_type());
ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(input, weights, bias, &shape_info));
- ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate(&shape_info, &shape_info, &shape_info, ConvertPolicy::SATURATE));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&shape_info, &shape_info, &shape_info, ConvertPolicy::SATURATE));
ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(&shape_info, &shape_info, info));
return Status{};
@@ -90,7 +90,7 @@ void NERNNLayer::configure(const ITensor *input, const ITensor *weights, const I
_add_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
_memory_group.manage(&_add_output);
- _add_kernel.configure(&_fully_connected_out, &_gemm_output, &_add_output, ConvertPolicy::SATURATE);
+ _add_f.configure(&_fully_connected_out, &_gemm_output, &_add_output, ConvertPolicy::SATURATE);
_fully_connected_out.allocator()->allocate();
_gemm_output.allocator()->allocate();
@@ -111,7 +111,7 @@ void NERNNLayer::run()
_gemm_state_f.run();
- NEScheduler::get().schedule(&_add_kernel, Window::DimY);
+ _add_f.run();
_activation.run();
// copy hidden out to output