From 668ccdcfb81bfab3a2d44cd1ddd956e83a2dfb09 Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Wed, 3 Feb 2021 10:32:59 +0000 Subject: Add dynamic tensor support to CpuElementwise The kernels and operators for binary and unary operations are now capable of being configured with dynamic shapes and computing windows at run-time. Additionally, changing arguments' names is done for consistency. Partially Implements: COMPMID-4127 Change-Id: I48e5038692db667dec7cb2b2906fe5683214fe19 Signed-off-by: Sang-Hoon Park Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4973 Tested-by: Arm Jenkins Reviewed-by: Pablo Marquez Tello Comments-Addressed: Arm Jenkins --- arm_compute/core/IKernel.h | 7 +- arm_compute/core/ITensorInfo.h | 24 ++- arm_compute/core/TensorInfo.h | 2 +- arm_compute/runtime/NEON/INEOperator.h | 4 + src/core/IKernel.cpp | 7 +- src/core/Validate.cpp | 4 +- src/core/cpu/kernels/CpuElementwiseKernel.cpp | 164 ++++++++++----------- src/core/cpu/kernels/CpuElementwiseKernel.h | 122 +++++++-------- src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp | 40 +++-- src/core/cpu/kernels/CpuElementwiseUnaryKernel.h | 18 +-- src/core/helpers/WindowHelpers.h | 16 ++ src/runtime/NEON/INEOperator.cpp | 7 +- src/runtime/cpu/operators/CpuElementwise.cpp | 72 +++++---- src/runtime/cpu/operators/CpuElementwise.h | 150 ++++++++++--------- src/runtime/cpu/operators/CpuElementwiseUnary.cpp | 13 ++ src/runtime/cpu/operators/CpuElementwiseUnary.h | 3 + tests/Utils.h | 53 +++++++ tests/validation/NEON/ElementwiseDivision.cpp | 28 ++++ tests/validation/NEON/ElementwiseRsqrtLayer.cpp | 19 +++ .../validation/fixtures/ElementWiseUnaryFixture.h | 35 ++++- .../fixtures/ElementwiseOperationsFixture.h | 51 ++++++- 21 files changed, 550 insertions(+), 289 deletions(-) diff --git a/arm_compute/core/IKernel.h b/arm_compute/core/IKernel.h index 11132f20a9..98fd18cc91 100644 --- a/arm_compute/core/IKernel.h +++ b/arm_compute/core/IKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -57,6 +57,11 @@ public: * @return The maximum window the kernel can be executed on. */ const Window &window() const; + /** Function to check if the embedded window of this kernel has been configured + * + * @return True if the windows has been configured + */ + bool is_window_configured() const; protected: /** Configure the kernel's window diff --git a/arm_compute/core/ITensorInfo.h b/arm_compute/core/ITensorInfo.h index 9ddafce7c0..0171e31086 100644 --- a/arm_compute/core/ITensorInfo.h +++ b/arm_compute/core/ITensorInfo.h @@ -41,8 +41,24 @@ class ITensorInfo : public misc::ICloneable { public: using TensorDimsState = Coordinates; - -public: + /** Get the value representing dynamic dimension state + * + * @return Value representing dynamic dimension state + * + */ + static constexpr int32_t get_dynamic_state_value() + { + return _dynamic_dimension; + } + /** Get the value representing static dimension state + * + * @return Value representing static dimension state + * + */ + static constexpr int32_t get_static_state_value() + { + return _static_dimension; + } /** Default virtual destructor */ virtual ~ITensorInfo() = default; /** Set the data type to the specified value. @@ -297,6 +313,10 @@ public: return std::pair(bc_shape, bc_valid_region); } + +private: + static constexpr int32_t _dynamic_dimension = -1; + static constexpr int32_t _static_dimension = 0; }; } // namespace arm_compute #endif /*ARM_COMPUTE_TENSORINFO_H */ diff --git a/arm_compute/core/TensorInfo.h b/arm_compute/core/TensorInfo.h index 42a969e01b..633daca063 100644 --- a/arm_compute/core/TensorInfo.h +++ b/arm_compute/core/TensorInfo.h @@ -293,7 +293,7 @@ public: } bool is_dynamic() const override { - return std::find(std::cbegin(_dims_state), std::cend(_dims_state), -1) != std::cend(_dims_state); + return std::find(std::cbegin(_dims_state), std::cend(_dims_state), get_dynamic_state_value()) != std::cend(_dims_state); } ITensorInfo &set_is_resizable(bool is_resizable) override { diff --git a/arm_compute/runtime/NEON/INEOperator.h b/arm_compute/runtime/NEON/INEOperator.h index b21dc49b20..184a5959b4 100644 --- a/arm_compute/runtime/NEON/INEOperator.h +++ b/arm_compute/runtime/NEON/INEOperator.h @@ -34,6 +34,8 @@ namespace arm_compute { class ICPPKernel; +class Window; + using INEKernel = ICPPKernel; namespace experimental { @@ -63,6 +65,8 @@ public: MemoryRequirements workspace() const override; protected: + void run(ITensorPack &tensors, const Window &window); + std::unique_ptr _kernel; IRuntimeContext *_ctx; MemoryRequirements _workspace; diff --git a/src/core/IKernel.cpp b/src/core/IKernel.cpp index 287cd04931..31f1ec7a3f 100644 --- a/src/core/IKernel.cpp +++ b/src/core/IKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -48,6 +48,11 @@ BorderSize IKernel::border_size() const return BorderSize(0); } +bool IKernel::is_window_configured() const +{ + return !((_window.x().start() == _window.x().end()) && (_window.x().end() == 0)); +} + void IKernel::configure(const Window &window) { _window = window; diff --git a/src/core/Validate.cpp b/src/core/Validate.cpp index bd5e494e94..8bb507921a 100644 --- a/src/core/Validate.cpp +++ b/src/core/Validate.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -167,7 +167,7 @@ arm_compute::Status arm_compute::error_on_unconfigured_kernel(const char *functi const arm_compute::IKernel *kernel) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(kernel == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG((kernel->window().x().start() == kernel->window().x().end()) && (kernel->window().x().end() == 0) && (kernel->window().x().step() == 0), + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(!kernel->is_window_configured(), function, file, line, "This kernel hasn't been configured."); return arm_compute::Status{}; diff --git a/src/core/cpu/kernels/CpuElementwiseKernel.cpp b/src/core/cpu/kernels/CpuElementwiseKernel.cpp index 1ac21acbc0..23e95f72d7 100644 --- a/src/core/cpu/kernels/CpuElementwiseKernel.cpp +++ b/src/core/cpu/kernels/CpuElementwiseKernel.cpp @@ -72,9 +72,9 @@ static ElementwiseKernel generate_kernel(UKernelType *ukernel) template std::function -configure_arithm_func(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) +configure_arithm_func(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) { - ARM_COMPUTE_UNUSED(input2, output); + ARM_COMPUTE_UNUSED(src1, dst); static ElementwiseKernel kernels[] = { #if defined(__ARM_FEATURE_SVE) @@ -103,7 +103,7 @@ configure_arithm_func(const ITensorInfo *input1, const ITensorInfo *input2, ITen for(const auto &uk : kernels) { - if(uk.is_selected(input1->data_type())) + if(uk.is_selected(src0->data_type())) { return uk.ukernel; } @@ -113,10 +113,10 @@ configure_arithm_func(const ITensorInfo *input1, const ITensorInfo *input2, ITen } template -std::function -configure_comp_func(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) +std::function +configure_comp_func(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) { - ARM_COMPUTE_UNUSED(input2, output); + ARM_COMPUTE_UNUSED(src1, dst); static ElementwiseKernel kernels[] = { #if defined(__ARM_FEATURE_SVE) @@ -148,7 +148,7 @@ configure_comp_func(const ITensorInfo *input1, const ITensorInfo *input2, ITenso for(const auto &uk : kernels) { - if(uk.is_selected(input1->data_type())) + if(uk.is_selected(src0->data_type())) { return uk.ukernel; } @@ -158,45 +158,43 @@ configure_comp_func(const ITensorInfo *input1, const ITensorInfo *input2, ITenso } } // namespace -Status CpuElementwiseKernel::validate_arguments_common(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) +Status CpuElementwiseKernel::validate_arguments_common(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst) { - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input1); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &input2); + ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&src0); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&src0, &src1); - const TensorShape out_shape = TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape()); + const TensorShape out_shape = TensorShape::broadcast_shape(src0.tensor_shape(), src1.tensor_shape()); ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible"); - // Validate in case of configured output - if(output.total_size() > 0) + // Validate in case of configured dst + if(dst.total_size() > 0) { - ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output.tensor_shape(), 0), + ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, dst.tensor_shape(), 0), "Wrong shape for output"); } return Status{}; } -void CpuElementwiseKernel::configure_common(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) +void CpuElementwiseKernel::configure_common(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - - // Configure kernel window - const TensorShape &out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); - - // Auto initialize output if not initialized - auto_init_if_empty(*output, out_shape, 1, input1->data_type()); + ARM_COMPUTE_ERROR_ON_NULLPTR(src0, src1, dst); - Window win = calculate_max_window(out_shape); + // If any of shapes is dynamic, expect a configured window and dst at run-time. + if(src0->is_dynamic() || src1->is_dynamic()) + { + return; + } - ICpuKernel::configure(win); + auto shape_and_window = compute_output_shape_and_window(*src0, *src1); + auto_init_if_empty(*dst, shape_and_window.first, 1, src0->data_type()); + ICpuKernel::configure(shape_and_window.second); } void CpuElementwiseKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { - ARM_COMPUTE_UNUSED(info, window); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window); + ARM_COMPUTE_UNUSED(info); auto src0 = tensors.get_const_tensor(TensorType::ACL_SRC_0); auto src1 = tensors.get_const_tensor(TensorType::ACL_SRC_1); @@ -208,49 +206,49 @@ void CpuElementwiseKernel::run_op(ITensorPack &tensors, const Window &window, co } /** Arithmetic operators (min, max, squared_diff) */ -void CpuArithmeticKernel::configure(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) +void CpuArithmeticKernel::configure(ArithmeticOperation op, const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) { - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1, *input2, *output)); - configure_common(input1, input2, output); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst)); + configure_common(src0, src1, dst); _op = op; } -Status CpuArithmeticKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) +Status CpuArithmeticKernel::validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32); - // Validate in case of configured output - if(output.total_size() > 0) + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src0, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32); + // Validate in case of configured dst + if(dst.total_size() > 0) { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&src0, &dst); } - return validate_arguments_common(input1, input2, output); + return validate_arguments_common(src0, src1, dst); } -Status CpuArithmeticKernel::validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) +Status CpuArithmeticKernel::validate(ArithmeticOperation op, const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst) { ARM_COMPUTE_UNUSED(op); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output)); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src0, src1, dst); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*src0, *src1, *dst)); return Status{}; } std::function -CpuArithmeticKernel::get_implementation(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) +CpuArithmeticKernel::get_implementation(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) { switch(_op) { case ArithmeticOperation::MAX: - return configure_arithm_func(input1, input2, output); + return configure_arithm_func(src0, src1, dst); case ArithmeticOperation::MIN: - return configure_arithm_func(input1, input2, output); + return configure_arithm_func(src0, src1, dst); case ArithmeticOperation::SQUARED_DIFF: - return configure_arithm_func(input1, input2, output); + return configure_arithm_func(src0, src1, dst); case ArithmeticOperation::PRELU: - return configure_arithm_func(input1, input2, output); + return configure_arithm_func(src0, src1, dst); case ArithmeticOperation::DIV: - return configure_arithm_func(input1, input2, output); + return configure_arithm_func(src0, src1, dst); case ArithmeticOperation::POWER: - return configure_arithm_func(input1, input2, output); + return configure_arithm_func(src0, src1, dst); default: ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); } @@ -259,91 +257,91 @@ CpuArithmeticKernel::get_implementation(const ITensorInfo *input1, const ITensor /** The division operator */ -void CpuDivisionKernel::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) +void CpuDivisionKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) { - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1, *input2, *output)); - configure_common(input1, input2, output); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst)); + configure_common(src0, src1, dst); _op = ArithmeticOperation::DIV; } -Status CpuDivisionKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) +Status CpuDivisionKernel::validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::S32, DataType::F16, DataType::F32); - return CpuArithmeticKernel::validate_arguments(input1, input2, output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src0, 1, DataType::S32, DataType::F16, DataType::F32); + return CpuArithmeticKernel::validate_arguments(src0, src1, dst); } -Status CpuDivisionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) +Status CpuDivisionKernel::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output)); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src0, src1, dst); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*src0, *src1, *dst)); return Status{}; } /** The power operator */ -void CpuPowerKernel::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) +void CpuPowerKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) { - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1, *input2, *output)); - configure_common(input1, input2, output); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst)); + configure_common(src0, src1, dst); _op = ArithmeticOperation::POWER; } -Status CpuPowerKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) +Status CpuPowerKernel::validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::F16, DataType::F32); - return CpuArithmeticKernel::validate_arguments(input1, input2, output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src0, 1, DataType::F16, DataType::F32); + return CpuArithmeticKernel::validate_arguments(src0, src1, dst); } -Status CpuPowerKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) +Status CpuPowerKernel::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output)); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src0, src1, dst); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*src0, *src1, *dst)); return Status{}; } /** Comparison operators (equal, not equal, less than, greater than, less than or equal, greater than or equal) */ -void CpuComparisonKernel::configure(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) +void CpuComparisonKernel::configure(ComparisonOperation op, const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) { - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1, *input2, *output)); - configure_common(input1, input2, output); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst)); + configure_common(src0, src1, dst); _op = op; } -Status CpuComparisonKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) +Status CpuComparisonKernel::validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32); - // Validate in case of configured output - if(output.total_size() > 0) + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src0, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32); + // Validate in case of configured dst + if(dst.total_size() > 0) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&dst, 1, DataType::U8); } - return validate_arguments_common(input1, input2, output); + return validate_arguments_common(src0, src1, dst); } -Status CpuComparisonKernel::validate(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) +Status CpuComparisonKernel::validate(ComparisonOperation op, const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst) { ARM_COMPUTE_UNUSED(op); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output)); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src0, src1, dst); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*src0, *src1, *dst)); return Status{}; } std::function -CpuComparisonKernel::get_implementation(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) +CpuComparisonKernel::get_implementation(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) { switch(_op) { case ComparisonOperation::Equal: - return configure_comp_func(input1, input2, output); + return configure_comp_func(src0, src1, dst); case ComparisonOperation::NotEqual: - return configure_comp_func(input1, input2, output); + return configure_comp_func(src0, src1, dst); case ComparisonOperation::Greater: - return configure_comp_func(input1, input2, output); + return configure_comp_func(src0, src1, dst); case ComparisonOperation::GreaterEqual: - return configure_comp_func(input1, input2, output); + return configure_comp_func(src0, src1, dst); case ComparisonOperation::Less: - return configure_comp_func(input1, input2, output); + return configure_comp_func(src0, src1, dst); case ComparisonOperation::LessEqual: - return configure_comp_func(input1, input2, output); + return configure_comp_func(src0, src1, dst); default: ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); } diff --git a/src/core/cpu/kernels/CpuElementwiseKernel.h b/src/core/cpu/kernels/CpuElementwiseKernel.h index 92cf880172..952c6e3e25 100644 --- a/src/core/cpu/kernels/CpuElementwiseKernel.h +++ b/src/core/cpu/kernels/CpuElementwiseKernel.h @@ -37,7 +37,7 @@ namespace kernels /** Interface for an element-wise operation kernel * * Element-wise operation is computed by: - * @f[ output(x,y) = OP(input1(x,y), input2(x,y))@f] + * @f[ dst(x,y) = OP(src0(x,y), src1(x,y))@f] * */ class CpuElementwiseKernel : public ICpuKernel @@ -53,9 +53,9 @@ public: /** Common signature for all the specialised arithmetic functions * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: Dependent on subclass. + * @param[in] src0 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[out] dst Output tensor info. Data types supported: Dependent on subclass. * @param[in] window Region on which to execute the kernel. */ using ElementwiseFunction = void(const ITensor *, const ITensor *, ITensor *, const Window &); @@ -66,26 +66,26 @@ public: protected: /** Validate the argument passed to the kernel * - * @param[in] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32. - * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. - * @param[in] output Output tensor. Data types supported: Dependent on subclass. + * @param[in] src0 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in] src1 Second tensor input. Data types supported: Same as @p src0. + * @param[in] dst Output tensor. Data types supported: Dependent on subclass. */ - static Status validate_arguments_common(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); + static Status validate_arguments_common(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst); /** Commmon configure function for element-wise operators with no additional options (e.g. Min, Max, SquaredDiff) * */ - void configure_common(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + void configure_common(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst); /** Function to get the micro kernel implementation * - * @param[in] input1 First input tensor information - * @param[in] input2 Second input tensor information - * @param[in] output Output tensor information + * @param[in] src0 First input tensor information + * @param[in] src1 Second input tensor information + * @param[in] dst Output tensor information * * @return the function instance for the micro kernel */ - virtual std::function get_implementation(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) = 0; + virtual std::function get_implementation(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) = 0; }; class CpuArithmeticKernel : public CpuElementwiseKernel @@ -96,40 +96,40 @@ public: /** Configure kernel * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] op Arithmetic operation to be executed. + * @param[in] src0 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[out] dst Output tensor info. Data types supported: Same as @p src0. */ - void configure(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + void configure(ArithmeticOperation op, const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst); /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] op Arithmetic operation to be executed. + * @param[in] src0 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[in] dst Output tensor info. Data types supported: Same as @p src0. * * @return a Status */ - static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + static Status validate(ArithmeticOperation op, const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst); protected: // Inherited methods overridden: - static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); + static Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst); ArithmeticOperation _op{}; private: /** Function to get the micro kernel implementation * - * @param[in] input1 First input tensor information - * @param[in] input2 Second input tensor information - * @param[in] output Output tensor information + * @param[in] src0 First input tensor information + * @param[in] src1 Second input tensor information + * @param[in] dst Output tensor information * * @return the function instance for the micro kernel */ - std::function get_implementation(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) override; + std::function get_implementation(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) override; }; class CpuDivisionKernel : public CpuArithmeticKernel @@ -140,25 +140,25 @@ public: /** Configure kernel * - * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] src0 First tensor input info. Data types supported: S32/F16/F32. + * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[out] dst Output tensor info. Data types supported: Same as @p src0. */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst); /** Static function to check if given info will lead to a valid configuration of @ref CpuDivisionKernel * - * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] src0 First tensor input info. Data types supported: S32/F16/F32. + * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[in] dst Output tensor info. Data types supported: Same as @p src0. * * @return a Status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst); protected: // Inherited methods overridden: - static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); + static Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst); }; class CpuPowerKernel : public CpuArithmeticKernel @@ -169,25 +169,25 @@ public: /** Configure kernel * - * @param[in] input1 First tensor input info. Data types supported: F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] src0 First tensor input info. Data types supported: F16/F32. + * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[out] dst Output tensor info. Data types supported: Same as @p src0. */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst); /** Static function to check if given info will lead to a valid configuration of @ref CpuPowerKernel * - * @param[in] input1 First tensor input info. Data types supported: F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] src0 First tensor input info. Data types supported: F16/F32. + * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[in] dst Output tensor info. Data types supported: Same as @p src0. * * @return a Status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst); protected: // Inherited methods overridden: - static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); + static Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst); }; class CpuComparisonKernel : public CpuElementwiseKernel @@ -198,38 +198,38 @@ public: /** Configure kernel * - * @param[in] op Comparison operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: U8. + * @param[in] op Comparison operation to be executed. + * @param[in] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[out] dst Output tensor info. Data types supported: U8. */ - void configure(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + void configure(ComparisonOperation op, const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst); /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuComparisonKernel * - * @param[in] op Comparison operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: U8. + * @param[in] op Comparison operation to be executed. + * @param[in] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[in] dst Output tensor info. Data types supported: U8. * * @return a Status */ - static Status validate(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + static Status validate(ComparisonOperation op, const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst); protected: // Inherited methods overridden: - static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); + static Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst); private: /** Function to get the micro kernel implementation * - * @param[in] input1 First input tensor information - * @param[in] input2 Second input tensor information - * @param[in] output Output tensor information + * @param[in] src0 First input tensor information + * @param[in] src1 Second input tensor information + * @param[in] dst Output tensor information * * @return the function instance for the micro kernel */ - std::function get_implementation(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) override; + std::function get_implementation(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) override; ComparisonOperation _op{}; }; diff --git a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp index 2b5c11f8e1..ff2d080c95 100644 --- a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp +++ b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp @@ -108,28 +108,28 @@ CpuElementwiseUnaryKernel::CpuElementwiseUnaryKernel() { } -void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo &input, ITensorInfo &output) +void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo &src, ITensorInfo &dst) { - ARM_COMPUTE_ERROR_THROW_ON(validate(op, input, output)); - - // Configure kernel window - const TensorShape &out_shape = TensorShape::broadcast_shape(input.tensor_shape()); - - // Auto initialize output if not initialized - auto_init_if_empty(output, out_shape, 1, input.data_type()); - - Window win = calculate_max_window(out_shape); + ARM_COMPUTE_ERROR_THROW_ON(validate(op, src, dst)); _op = op; - ICpuKernel::configure(win); + // If input shape is dynamic, expect a configured window and dst at run-time. + if(src.is_dynamic()) + { + return; + } + + auto shape_and_window = compute_output_shape_and_window(src); + auto_init_if_empty(dst, shape_and_window.first, 1, src.data_type()); + ICpuKernel::configure(shape_and_window.second); } -Status CpuElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInfo &input, const ITensorInfo &output) +Status CpuElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInfo &src, const ITensorInfo &dst) { - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input); + ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&src); - const auto *uk = get_implementation(input.data_type()); + const auto *uk = get_implementation(src.data_type()); ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); switch(op) @@ -139,19 +139,19 @@ Status CpuElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInf case ElementWiseUnary::LOG: case ElementWiseUnary::ROUND: case ElementWiseUnary::SIN: - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src, 1, DataType::F16, DataType::F32); break; case ElementWiseUnary::NEG: case ElementWiseUnary::ABS: - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::F16, DataType::F32, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src, 1, DataType::F16, DataType::F32, DataType::S32); break; default: ARM_COMPUTE_ERROR("ElementWiseUnary operation not supported"); } - // Validate in case of configured output - if(output.total_size() > 0) + // Validate in case of configured dst + if(dst.total_size() > 0) { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input, &output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&src, &dst); } return Status{}; @@ -160,8 +160,6 @@ Status CpuElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInf void CpuElementwiseUnaryKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window); auto src = tensors.get_const_tensor(TensorType::ACL_SRC); auto dst = tensors.get_tensor(TensorType::ACL_DST); diff --git a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.h b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.h index 193f6f1e4f..ceb90dcf70 100644 --- a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.h +++ b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.h @@ -38,7 +38,7 @@ namespace kernels /** Interface for an element-wise unary operation kernel * * Element-wise operation is computed by: - * @f[ output(x) = OP(input(x))@f] + * @f[ dst(x) = OP(src(x))@f] * */ class CpuElementwiseUnaryKernel : public ICpuKernel @@ -56,21 +56,21 @@ public: /** Function to configure the @ref CpuElementwiseUnaryKernel * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input First tensor input. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations. - * @param[out] output Output tensor. Data types supported: Same as @p input. + * @param[in] op Arithmetic operation to be executed. + * @param[in] src First tensor input. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations. + * @param[out] dst Output tensor. Data types supported: Same as @p src. */ - void configure(ElementWiseUnary op, const ITensorInfo &input, ITensorInfo &output); + void configure(ElementWiseUnary op, const ITensorInfo &src, ITensorInfo &dst); /** Static function to check if given info will lead to a valid configuration of @ref CpuElementwiseUnaryKernel * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input First tensor input info. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations. - * @param[in] output Output tensor info. Data types supported: Same as @p input. + * @param[in] op Arithmetic operation to be executed. + * @param[in] src First tensor input info. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations. + * @param[in] dst Output tensor info. Data types supported: Same as @p src. * * @return a Status */ - static Status validate(ElementWiseUnary op, const ITensorInfo &input, const ITensorInfo &output); + static Status validate(ElementWiseUnary op, const ITensorInfo &src, const ITensorInfo &dst); // Inherited methods overridden: void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; diff --git a/src/core/helpers/WindowHelpers.h b/src/core/helpers/WindowHelpers.h index 9216c33f16..637e9323ab 100644 --- a/src/core/helpers/WindowHelpers.h +++ b/src/core/helpers/WindowHelpers.h @@ -177,6 +177,22 @@ inline Window calculate_max_enlarged_window(const ITensorInfo &info, const Steps { return calculate_max_enlarged_window(info.valid_region(), steps, border_size); } + +/** Function to compute the shape of output and window for the given inputs + * + * @param[in] infos Input tensor informations + * + * @return A pair of the shape and window + */ +template +std::pair compute_output_shape_and_window(const Infos &... infos) +{ + const std::pair broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(infos...); + const TensorShape &out_shape = broadcast_pair.first; + const ValidRegion &valid_region = broadcast_pair.second; + + return std::make_pair(out_shape, calculate_max_window(valid_region)); +} #endif /* DOXYGEN_SKIP_THIS */ } // namespace arm_compute diff --git a/src/runtime/NEON/INEOperator.cpp b/src/runtime/NEON/INEOperator.cpp index ccee8ffc21..a5fc0a2726 100644 --- a/src/runtime/NEON/INEOperator.cpp +++ b/src/runtime/NEON/INEOperator.cpp @@ -44,7 +44,12 @@ void INEOperator::run(ITensorPack &tensors) ARM_COMPUTE_ERROR("No inputs provided"); } - NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, _kernel->window(), tensors); + run(tensors, _kernel->window()); +} + +void INEOperator::run(ITensorPack &tensors, const Window &window) +{ + NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, window, tensors); } void INEOperator::prepare(ITensorPack &constants) diff --git a/src/runtime/cpu/operators/CpuElementwise.cpp b/src/runtime/cpu/operators/CpuElementwise.cpp index 322bd09c43..b5c8dde925 100644 --- a/src/runtime/cpu/operators/CpuElementwise.cpp +++ b/src/runtime/cpu/operators/CpuElementwise.cpp @@ -23,95 +23,111 @@ */ #include "src/runtime/cpu/operators/CpuElementwise.h" #include "src/core/cpu/kernels/CpuElementwiseKernel.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { namespace cpu { -void CpuElementwiseMax::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) +void CpuElementwiseBase::run(ITensorPack &tensors) +{ + // If the kernel has been configured, use the window from the kernel. + if(_kernel->is_window_configured()) + { + ICpuOperator::run(tensors); + return; + } + + auto src0_info = tensors.get_const_tensor(TensorType::ACL_SRC_0)->info(); + auto src1_info = tensors.get_const_tensor(TensorType::ACL_SRC_1)->info(); + auto shape_and_window = compute_output_shape_and_window(*src0_info, *src1_info); + ICpuOperator::run(tensors, shape_and_window.second); +} + +void CpuElementwiseMax::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) { auto k = std::make_unique(); - k->configure(ArithmeticOperation::MAX, input1, input2, output); + k->configure(ArithmeticOperation::MAX, src0, src1, dst); _kernel = std::move(k); } -Status CpuElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) +Status CpuElementwiseMax::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst) { - return kernels::CpuArithmeticKernel::validate(ArithmeticOperation::MAX, input1, input2, output); + return kernels::CpuArithmeticKernel::validate(ArithmeticOperation::MAX, src0, src1, dst); } -void CpuElementwiseMin::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) +void CpuElementwiseMin::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) { auto k = std::make_unique(); - k->configure(ArithmeticOperation::MIN, input1, input2, output); + k->configure(ArithmeticOperation::MIN, src0, src1, dst); _kernel = std::move(k); } -Status CpuElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) +Status CpuElementwiseMin::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst) { - return kernels::CpuArithmeticKernel::validate(ArithmeticOperation::MIN, input1, input2, output); + return kernels::CpuArithmeticKernel::validate(ArithmeticOperation::MIN, src0, src1, dst); } -void CpuElementwiseSquaredDiff::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) +void CpuElementwiseSquaredDiff::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) { auto k = std::make_unique(); - k->configure(ArithmeticOperation::SQUARED_DIFF, input1, input2, output); + k->configure(ArithmeticOperation::SQUARED_DIFF, src0, src1, dst); _kernel = std::move(k); } -Status CpuElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) +Status CpuElementwiseSquaredDiff::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst) { - return kernels::CpuArithmeticKernel::validate(ArithmeticOperation::SQUARED_DIFF, input1, input2, output); + return kernels::CpuArithmeticKernel::validate(ArithmeticOperation::SQUARED_DIFF, src0, src1, dst); } -void CpuElementwiseDivision::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) +void CpuElementwiseDivision::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) { auto k = std::make_unique(); - k->configure(input1, input2, output); + k->configure(src0, src1, dst); _kernel = std::move(k); } -Status CpuElementwiseDivision::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) +Status CpuElementwiseDivision::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst) { - return kernels::CpuDivisionKernel::validate(input1, input2, output); + return kernels::CpuDivisionKernel::validate(src0, src1, dst); } -void CpuElementwisePower::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) +void CpuElementwisePower::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) { auto k = std::make_unique(); - k->configure(input1, input2, output); + k->configure(src0, src1, dst); _kernel = std::move(k); } -Status CpuElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) +Status CpuElementwisePower::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst) { - return kernels::CpuPowerKernel::validate(input1, input2, output); + return kernels::CpuPowerKernel::validate(src0, src1, dst); } template -void CpuElementwiseComparisonStatic::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) +void CpuElementwiseComparisonStatic::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) { auto k = std::make_unique(); - k->configure(COP, input1, input2, output); + k->configure(COP, src0, src1, dst); _kernel = std::move(k); } template -Status CpuElementwiseComparisonStatic::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) +Status CpuElementwiseComparisonStatic::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst) { - return kernels::CpuComparisonKernel::validate(COP, input1, input2, output); + return kernels::CpuComparisonKernel::validate(COP, src0, src1, dst); } -void CpuElementwiseComparison::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ComparisonOperation op) +void CpuElementwiseComparison::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst, ComparisonOperation op) { auto k = std::make_unique(); - k->configure(op, input1, input2, output); + k->configure(op, src0, src1, dst); _kernel = std::move(k); } -Status CpuElementwiseComparison::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op) +Status CpuElementwiseComparison::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, ComparisonOperation op) { - return kernels::CpuComparisonKernel::validate(op, input1, input2, output); + return kernels::CpuComparisonKernel::validate(op, src0, src1, dst); } // Supported Specializations diff --git a/src/runtime/cpu/operators/CpuElementwise.h b/src/runtime/cpu/operators/CpuElementwise.h index 611a374c26..4b350d5f9f 100644 --- a/src/runtime/cpu/operators/CpuElementwise.h +++ b/src/runtime/cpu/operators/CpuElementwise.h @@ -30,30 +30,36 @@ namespace arm_compute { namespace cpu { +class CpuElementwiseBase : public ICpuOperator +{ +public: + // Inherited methods overridden: + void run(ITensorPack &tensors) override; +}; /** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for max * * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @note The function performs a max operation between two tensors. */ -class CpuElementwiseMax : public ICpuOperator +class CpuElementwiseMax : public CpuElementwiseBase { public: - /** Initialise the kernel's inputs, output and conversion policy. + /** Initialise the kernel's inputs, dst and conversion policy. * - * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: Same as @p input1. + * @param[in, out] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in, out] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[out] dst Output tensor info. Data types supported: Same as @p src0. */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst); /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for max * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[in] dst Output tensor info. Data types supported: Same as @p src0. * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst); }; /** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for min @@ -61,25 +67,25 @@ public: * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @note The function performs a min operation between two tensors. */ -class CpuElementwiseMin : public ICpuOperator +class CpuElementwiseMin : public CpuElementwiseBase { public: - /** Initialise the kernel's inputs, output and conversion policy. + /** Initialise the kernel's inputs, dst and conversion policy. * - * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: Same as @p input1. + * @param[in, out] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in, out] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[out] dst Output tensor info. Data types supported: Same as @p src0. */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst); /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for min * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[in] dst Output tensor info. Data types supported: Same as @p src0. * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst); }; /** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for squared difference @@ -87,25 +93,25 @@ public: * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2 */ -class CpuElementwiseSquaredDiff : public ICpuOperator +class CpuElementwiseSquaredDiff : public CpuElementwiseBase { public: - /** Initialise the kernel's inputs, output and conversion policy. + /** Initialise the kernel's inputs, dst and conversion policy. * - * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: Same as @p input1. + * @param[in, out] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in, out] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[out] dst Output tensor info. Data types supported: Same as @p src0. */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst); /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for squared difference * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[in] dst Output tensor info. Data types supported: Same as @p src0. * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst); }; /** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for division @@ -113,25 +119,25 @@ public: * @note The tensor data type for the inputs must be S32/F16/F32. * @note The function performs a division operation between two tensors (i.e., out[i] = in1[i] / in2[i]) */ -class CpuElementwiseDivision : public ICpuOperator +class CpuElementwiseDivision : public CpuElementwiseBase { public: - /** Initialise the kernel's inputs, output and conversion policy. + /** Initialise the kernel's inputs, dst and conversion policy. * - * @param[in, out] input1 First tensor input info. Data types supported: S32/F16/F32. - * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: Same as @p input1. + * @param[in, out] src0 First tensor input info. Data types supported: S32/F16/F32. + * @param[in, out] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[out] dst Output tensor info. Data types supported: Same as @p src0. */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst); /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for division * - * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] src0 First tensor input info. Data types supported: S32/F16/F32. + * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[in] dst Output tensor info. Data types supported: Same as @p src0. * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst); }; /** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for power @@ -140,25 +146,25 @@ public: * @note The function performs a elementwise power of in1 to in2 (i.e., out[i] = in1[i] ^ in2[i]) * @note For an exponent that is a float, this function will only work with a positive base. */ -class CpuElementwisePower : public ICpuOperator +class CpuElementwisePower : public CpuElementwiseBase { public: - /** Initialise the kernel's inputs, output and conversion policy. + /** Initialise the kernel's inputs, dst and conversion policy. * - * @param[in, out] input1 First tensor input info. Data types supported: F16/F32. - * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: Same as @p input1. + * @param[in, out] src0 First tensor input info. Data types supported: F16/F32. + * @param[in, out] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[out] dst Output tensor info. Data types supported: Same as @p src0. */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst); /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for power * - * @param[in] input1 First tensor input info. Data types supported: F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] src0 First tensor input info. Data types supported: F16/F32. + * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[in] dst Output tensor info. Data types supported: Same as @p src0. * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst); }; /** Basic function to run @ref cpu::kernels::CpuComparisonKernel. @@ -166,27 +172,27 @@ public: * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @note The function performs a comparison operation between two tensors. */ -class CpuElementwiseComparison : public ICpuOperator +class CpuElementwiseComparison : public CpuElementwiseBase { public: - /** Initialise the kernel's inputs, output and conversion policy. + /** Initialise the kernel's inputs, dst and conversion policy. * - * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: U16/U32. - * @param[in] op Comparison Operation to be performed. + * @param[in, out] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in, out] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[out] dst Output tensor info. Data types supported: U16/U32. + * @param[in] op Comparison Operation to be performed. */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ComparisonOperation op); + void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst, ComparisonOperation op); /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuComparisonKernel * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: U16/U32. - * @param[in] op Comparison Operation to be performed. + * @param[in] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[in] dst Output tensor info. Data types supported: U16/U32. + * @param[in] op Comparison Operation to be performed. * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op); + static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, ComparisonOperation op); }; /** Basic function to run @ref cpu::kernels::CpuComparisonKernel @@ -195,25 +201,25 @@ public: * @note The function performs a comparison operation between two tensors. */ template -class CpuElementwiseComparisonStatic : public ICpuOperator +class CpuElementwiseComparisonStatic : public CpuElementwiseBase { public: - /** Initialise the kernel's inputs, output and conversion policy. + /** Initialise the kernel's inputs, dst and conversion policy. * - * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: U16/U32. + * @param[in, out] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in, out] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[out] dst Output tensor info. Data types supported: U16/U32. */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst); /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuComparisonKernel * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: U16/U32. + * @param[in] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0. + * @param[in] dst Output tensor info. Data types supported: U16/U32. * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst); }; /** Basic function to run equal comparison. */ diff --git a/src/runtime/cpu/operators/CpuElementwiseUnary.cpp b/src/runtime/cpu/operators/CpuElementwiseUnary.cpp index d1b1700927..2140c5cf78 100644 --- a/src/runtime/cpu/operators/CpuElementwiseUnary.cpp +++ b/src/runtime/cpu/operators/CpuElementwiseUnary.cpp @@ -23,6 +23,7 @@ */ #include "src/runtime/cpu/operators/CpuElementwiseUnary.h" #include "src/core/cpu/kernels/CpuElementwiseUnaryKernel.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { @@ -41,5 +42,17 @@ Status CpuElementwiseUnary::validate(ElementWiseUnary op, const ITensorInfo &src { return KernelType::validate(op, src, dst); } + +void CpuElementwiseUnary::run(ITensorPack &tensors) +{ + if(_kernel->is_window_configured()) + { + ICpuOperator::run(tensors); + return; + } + + auto src_info = tensors.get_const_tensor(TensorType::ACL_SRC)->info(); + ICpuOperator::run(tensors, compute_output_shape_and_window(*src_info).second); +} } // namespace cpu } // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/cpu/operators/CpuElementwiseUnary.h b/src/runtime/cpu/operators/CpuElementwiseUnary.h index 0b2a9e730d..721ba2a85b 100644 --- a/src/runtime/cpu/operators/CpuElementwiseUnary.h +++ b/src/runtime/cpu/operators/CpuElementwiseUnary.h @@ -50,6 +50,9 @@ public: * @return a status */ static Status validate(ElementWiseUnary op, const ITensorInfo &src, const ITensorInfo &dst); + + // Inherited methods overridden: + void run(ITensorPack &tensors) override; }; } // namespace cpu diff --git a/tests/Utils.h b/tests/Utils.h index 2569c41a9e..fe9fe712cf 100644 --- a/tests/Utils.h +++ b/tests/Utils.h @@ -814,6 +814,59 @@ inline void sync_tensor_if_necessary(TensorType &tensor) { ARM_COMPUTE_UNUSED(tensor); } + +/** Construct and return object for dimensions' state filled with the given value + * + * @param[in] value The value to fill + * + * @return Constructed class + */ +inline ITensorInfo::TensorDimsState construct_dims_state(int32_t value) +{ + auto states = ITensorInfo::TensorDimsState{}; + std::fill(states.begin(), states.end(), value); + return states; +} + +/** Construct and return object for dimensions' state filled with the value for dynamic state + * + * @return Constructed class filled with the value for dynamic state + */ +inline ITensorInfo::TensorDimsState construct_dynamic_dims_state() +{ + return construct_dims_state(ITensorInfo::get_dynamic_state_value()); +} + +/** Construct and return object for dimensions' state filled with the value for non-dynamic state + * + * @return Constructed class filled with the value for non-dynamic state + */ +inline ITensorInfo::TensorDimsState construct_static_dims_state() +{ + return construct_dims_state(ITensorInfo::get_static_state_value()); +} + +/** Set the dimension states of the given tensor to dynamic + * + * @param[in] t The tensor to set to dynamic state + * + */ +template +void set_tensor_dynamic(TensorType &t) +{ + t.info()->set_tensor_dims_state(construct_dynamic_dims_state()); +} + +/** Set the dimension states of the given tensor to state + * + * @param[in] t The tensor to set to static state + * + */ +template +void set_tensor_static(TensorType &t) +{ + t.info()->set_tensor_dims_state(construct_static_dims_state()); +} } // namespace test } // namespace arm_compute #endif /* ARM_COMPUTE_TEST_UTILS_H */ diff --git a/tests/validation/NEON/ElementwiseDivision.cpp b/tests/validation/NEON/ElementwiseDivision.cpp index 3656560281..8abccb2ed6 100644 --- a/tests/validation/NEON/ElementwiseDivision.cpp +++ b/tests/validation/NEON/ElementwiseDivision.cpp @@ -93,6 +93,34 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( // clang-format on // *INDENT-ON* +// Test test cases will execute the function with dynamic-stated shapes +// Since other elementwise operations share the same kernel, this tests are added only here. +// Also, only FP32 is tested since data type doesn't/shouldn't matter with dynamic shapes. +TEST_SUITE(DynamicShape) +template +using CpuElementwiseDivisionDynamicShapeFixture = ArithmeticDivisionDynamicShapeValidationFixture; + +template +using CpuElementwiseDivisionBroadcastDynamicShapeFixture = ArithmeticDivisionBroadcastDynamicShapeValidationFixture; + +TEST_SUITE(F32) + +FIXTURE_DATA_TEST_CASE(RunSmall, CpuElementwiseDivisionDynamicShapeFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseDivisionFP32Dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32, 0.01); +} + +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CpuElementwiseDivisionBroadcastDynamicShapeFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapesBroadcast(), + ElementwiseDivisionFP32Dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32, 0.01); +} + +TEST_SUITE_END() // F32 +TEST_SUITE_END() // DynamicShape + TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(F16) diff --git a/tests/validation/NEON/ElementwiseRsqrtLayer.cpp b/tests/validation/NEON/ElementwiseRsqrtLayer.cpp index f41500cc0b..1591b76cd7 100644 --- a/tests/validation/NEON/ElementwiseRsqrtLayer.cpp +++ b/tests/validation/NEON/ElementwiseRsqrtLayer.cpp @@ -50,6 +50,25 @@ RelativeTolerance tolerance_fp16(0.01f); TEST_SUITE(NEON) TEST_SUITE(RsqrtLayer) +// Test test cases will execute the function with dynamic-stated shapes +// Since other elementwise unary operations share the same kernel, this tests are added only here. +// Also, only FP32 is tested since data type doesn't/shouldn't matter with dynamic shapes. +TEST_SUITE(DynamicShape) +TEST_SUITE(FP32) + +template +using CpuRsqrtDynamicShapeFixture = RsqrtDynamicShapeValidationFixture; + +FIXTURE_DATA_TEST_CASE(RunSmall, CpuRsqrtDynamicShapeFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::F32))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32); +} + +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // DynamicShape + template using NERsqrtLayerFixture = RsqrtValidationFixture; diff --git a/tests/validation/fixtures/ElementWiseUnaryFixture.h b/tests/validation/fixtures/ElementWiseUnaryFixture.h index 8cffef48f6..9729907630 100644 --- a/tests/validation/fixtures/ElementWiseUnaryFixture.h +++ b/tests/validation/fixtures/ElementWiseUnaryFixture.h @@ -44,11 +44,12 @@ class ElementWiseUnaryValidationFixture : public framework::Fixture { public: template - void setup(TensorShape input_shape, DataType input_data_type, bool in_place, ElementWiseUnary op) + void setup(TensorShape input_shape, DataType input_data_type, bool in_place, ElementWiseUnary op, bool use_dynamic_shape = false) { - _op = op; - _target = compute_target(input_shape, input_data_type, in_place); - _reference = compute_reference(input_shape, input_data_type); + _op = op; + _target = compute_target(input_shape, input_data_type, in_place); + _reference = compute_reference(input_shape, input_data_type); + _use_dynamic_shape = use_dynamic_shape; } protected: @@ -131,10 +132,24 @@ protected: TensorType *actual_dst = in_place ? &src : &dst; + // if _use_dynamic_shape is true, this fixture will test scenario for dynamic shapes. + // - At configure time, all input tensors are marked as dynamic using set_tensor_dynamic() + // - After configure, tensors are marked as static for run using set_tensor_static() + // - The tensors with static shape are given to run() + if(_use_dynamic_shape) + { + set_tensor_dynamic(src); + } + // Create and configure function FunctionType elwiseunary_layer; elwiseunary_layer.configure(&src, actual_dst); + if(_use_dynamic_shape) + { + set_tensor_static(src); + } + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); src.allocator()->allocate(); ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -175,6 +190,7 @@ protected: TensorType _target{}; SimpleTensor _reference{}; ElementWiseUnary _op{}; + bool _use_dynamic_shape{ false }; }; template @@ -188,6 +204,17 @@ public: } }; +template +class RsqrtDynamicShapeValidationFixture : public ElementWiseUnaryValidationFixture +{ +public: + template + void setup(const TensorShape &shape, DataType data_type) + { + ElementWiseUnaryValidationFixture::setup(shape, data_type, false, ElementWiseUnary::RSQRT, true); + } +}; + template class ExpValidationFixture : public ElementWiseUnaryValidationFixture { diff --git a/tests/validation/fixtures/ElementwiseOperationsFixture.h b/tests/validation/fixtures/ElementwiseOperationsFixture.h index dcb408c801..bf51c7e69b 100644 --- a/tests/validation/fixtures/ElementwiseOperationsFixture.h +++ b/tests/validation/fixtures/ElementwiseOperationsFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -48,9 +48,11 @@ public: template void setup(ArithmeticOperation op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, - QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out) + QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool use_dyanmic_shape = false) { - _op = op; + _op = op; + _use_dynamic_shape = use_dyanmic_shape; + _target = compute_target(shape0, shape1, data_type0, data_type1, output_data_type, qinfo0, qinfo1, qinfo_out); _reference = compute_reference(shape0, shape1, data_type0, data_type1, output_data_type, qinfo0, qinfo1, qinfo_out); } @@ -87,10 +89,26 @@ protected: TensorType ref_src2 = create_tensor(shape1, data_type1, 1, qinfo1); TensorType dst = create_tensor(TensorShape::broadcast_shape(shape0, shape1), output_data_type, 1, qinfo_out); + // if _use_dynamic_shape is true, this fixture will test scenario for dynamic shapes. + // - At configure time, all input tensors are marked as dynamic using set_tensor_dynamic() + // - After configure, tensors are marked as static for run using set_tensor_static() + // - The tensors with static shape are given to run() + if(_use_dynamic_shape) + { + set_tensor_dynamic(ref_src1); + set_tensor_dynamic(ref_src2); + } + // Create and configure function FunctionType elem_op; elem_op.configure(&ref_src1, &ref_src2, &dst); + if(_use_dynamic_shape) + { + set_tensor_static(ref_src1); + set_tensor_static(ref_src2); + } + ARM_COMPUTE_EXPECT(ref_src1.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(ref_src2.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -133,6 +151,7 @@ protected: TensorType _target{}; SimpleTensor _reference{}; ArithmeticOperation _op{ ArithmeticOperation::ADD }; + bool _use_dynamic_shape{ false }; }; // Arithmetic operation fused with activation function @@ -225,6 +244,32 @@ public: } }; +template +class ArithmeticDivisionBroadcastDynamicShapeValidationFixture : public ArithmeticOperationsGenericFixture +{ +public: + template + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type) + { + ArithmeticOperationsGenericFixture::setup(ArithmeticOperation::DIV, shape0, shape1, + data_type0, data_type1, output_data_type, + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true); + } +}; + +template +class ArithmeticDivisionDynamicShapeValidationFixture : public ArithmeticOperationsGenericFixture +{ +public: + template + void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type) + { + ArithmeticOperationsGenericFixture::setup(ArithmeticOperation::DIV, shape, shape, + data_type0, data_type1, output_data_type, + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true); + } +}; + template class ArithmeticDivisionBroadcastValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture { -- cgit v1.2.1