From 173ba9bbb19ea83f951318d9989e440768b4de8f Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Tue, 23 Jun 2020 17:25:43 +0100 Subject: COMPMID-3373: Async support to NEArithmetic* kernels/functions (Pt. 1) Added support on NEArithmeticAddition and NEArithmeticSubtraction Signed-off-by: Michalis Spyrou Change-Id: Ifa805f8455ef6eff1ee627752dc1c7fe9740ec47 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3451 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- .../NEON/kernels/NEArithmeticAdditionKernel.cpp | 26 ++++++------- .../NEON/kernels/NEArithmeticSubtractionKernel.cpp | 44 ++++++++++------------ 2 files changed, 31 insertions(+), 39 deletions(-) (limited to 'src/core/NEON') diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp index 3878c764a6..1459f7f250 100644 --- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp @@ -853,7 +853,7 @@ Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, return Status{}; } -std::pair validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) +std::pair validate_and_configure_window(const ITensorInfo &input1, const ITensorInfo &input2, ITensorInfo &output) { const std::pair broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(input1, input2); const TensorShape &out_shape = broadcast_pair.first; @@ -904,17 +904,17 @@ std::pair validate_and_configure_window(ITensorInfo &input1, ITe } // namespace NEArithmeticAdditionKernel::NEArithmeticAdditionKernel() - : _func(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr), _policy() + : _func(nullptr), _policy() { } -void NEArithmeticAdditionKernel::configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy) +void NEArithmeticAdditionKernel::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy) { ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1->info(), *input2->info(), *output->info(), policy)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1, *input2, *output, policy)); // Configure kernel window - auto win_config = validate_and_configure_window(*input1->info(), *input2->info(), *output->info()); + auto win_config = validate_and_configure_window(*input1, *input2, *output); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); static std::map map_function = @@ -945,16 +945,13 @@ void NEArithmeticAdditionKernel::configure(const ITensor *input1, const ITensor #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ }; - _input1 = input1; - _input2 = input2; - _output = output; _policy = policy; std::string function_to_call("add_"); function_to_call += policy == ConvertPolicy::WRAP ? "wrap_" : "saturate_"; - function_to_call += string_from_data_type(input1->info()->data_type()) + "_"; - function_to_call += string_from_data_type(input2->info()->data_type()) + "_"; - function_to_call += string_from_data_type(output->info()->data_type()); + function_to_call += string_from_data_type(input1->data_type()) + "_"; + function_to_call += string_from_data_type(input2->data_type()) + "_"; + function_to_call += string_from_data_type(output->data_type()); auto it = map_function.find(function_to_call); @@ -976,13 +973,12 @@ Status NEArithmeticAdditionKernel::validate(const ITensorInfo *input1, const ITe return Status{}; } -void NEArithmeticAdditionKernel::run(const Window &window, const ThreadInfo &info) +void NEArithmeticAdditionKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - (*_func)(_input1, _input2, _output, _policy, window); + // Dispatch kernel + (*_func)(inputs.at(TensorType::ACL_SRC_0), inputs.at(TensorType::ACL_SRC_1), outputs.at(TensorType::ACL_DST), _policy, window); } } // namespace arm_compute diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp index 2b3fce3fea..2097d761a7 100644 --- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp @@ -719,35 +719,32 @@ inline Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &i } // namespace NEArithmeticSubtractionKernel::NEArithmeticSubtractionKernel() - : _func(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr), _policy(ConvertPolicy::WRAP) + : _func(nullptr), _policy(ConvertPolicy::WRAP) { } -void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy) +void NEArithmeticSubtractionKernel::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy) { ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1->info(), *input2->info(), *output->info(), policy)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1, *input2, *output, policy)); - _input1 = input1; - _input2 = input2; - _output = output; _policy = policy; - const std::pair broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*input1->info(), *input2->info()); + const std::pair broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*input1, *input2); const TensorShape &out_shape = broadcast_pair.first; const ValidRegion &valid_region = broadcast_pair.second; // Auto initialize output if not initialized - set_shape_if_empty(*output->info(), out_shape); + set_shape_if_empty(*output, out_shape); - switch(input1->info()->data_type()) + switch(input1->data_type()) { case DataType::U8: - if(input2->info()->data_type() == DataType::U8 && output->info()->data_type() == DataType::U8) + if(input2->data_type() == DataType::U8 && output->data_type() == DataType::U8) { _func = &sub_same; } - else if(input2->info()->data_type() == DataType::U8 && output->info()->data_type() == DataType::S16) + else if(input2->data_type() == DataType::U8 && output->data_type() == DataType::S16) { _func = &sub_U8_U8_S16; } @@ -758,14 +755,14 @@ void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITens break; case DataType::QASYMM8: _func = &sub_quantized; - set_data_type_if_unknown(*output->info(), DataType::QASYMM8); + set_data_type_if_unknown(*output, DataType::QASYMM8); break; case DataType::QASYMM8_SIGNED: _func = &sub_quantized; - set_data_type_if_unknown(*output->info(), DataType::QASYMM8_SIGNED); + set_data_type_if_unknown(*output, DataType::QASYMM8_SIGNED); break; case DataType::S16: - if(input2->info()->data_type() == DataType::U8) + if(input2->data_type() == DataType::U8) { _func = &sub_S16_U8_S16; } @@ -773,21 +770,21 @@ void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITens { _func = &sub_same; } - set_format_if_unknown(*output->info(), Format::S16); + set_format_if_unknown(*output, Format::S16); break; case DataType::QSYMM16: _func = &sub_QSYMM16_QSYMM16_QSYMM16; - set_data_type_if_unknown(*output->info(), DataType::QSYMM16); + set_data_type_if_unknown(*output, DataType::QSYMM16); break; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: _func = &sub_same; - set_format_if_unknown(*output->info(), Format::F16); + set_format_if_unknown(*output, Format::F16); break; #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ case DataType::F32: _func = &sub_same; - set_format_if_unknown(*output->info(), Format::F32); + set_format_if_unknown(*output, Format::F32); break; default: _func = nullptr; @@ -795,8 +792,8 @@ void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITens // NEArithmeticSubtractionKernel doesn't need padding so update_window_and_padding() can be skipped Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(valid_region); + coord.set_num_dimensions(output->num_dimensions()); + output->set_valid_region(valid_region); Window win = calculate_max_window(valid_region, Steps()); INEKernel::configure(win); @@ -810,13 +807,12 @@ Status NEArithmeticSubtractionKernel::validate(const ITensorInfo *input1, const return Status{}; } -void NEArithmeticSubtractionKernel::run(const Window &window, const ThreadInfo &info) +void NEArithmeticSubtractionKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - (*_func)(_input1, _input2, _output, window, (_policy == ConvertPolicy::SATURATE)); + // Dispatch kernel + (*_func)(inputs.at(TensorType::ACL_SRC_0), inputs.at(TensorType::ACL_SRC_1), outputs.at(TensorType::ACL_DST), window, (_policy == ConvertPolicy::SATURATE)); } } // namespace arm_compute -- cgit v1.2.1