aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/IKernel.cpp7
-rw-r--r--src/core/Validate.cpp4
-rw-r--r--src/core/cpu/kernels/CpuElementwiseKernel.cpp164
-rw-r--r--src/core/cpu/kernels/CpuElementwiseKernel.h122
-rw-r--r--src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp40
-rw-r--r--src/core/cpu/kernels/CpuElementwiseUnaryKernel.h18
-rw-r--r--src/core/helpers/WindowHelpers.h16
-rw-r--r--src/runtime/NEON/INEOperator.cpp7
-rw-r--r--src/runtime/cpu/operators/CpuElementwise.cpp72
-rw-r--r--src/runtime/cpu/operators/CpuElementwise.h150
-rw-r--r--src/runtime/cpu/operators/CpuElementwiseUnary.cpp13
-rw-r--r--src/runtime/cpu/operators/CpuElementwiseUnary.h3
12 files changed, 338 insertions, 278 deletions
diff --git a/src/core/IKernel.cpp b/src/core/IKernel.cpp
index 287cd04931..31f1ec7a3f 100644
--- a/src/core/IKernel.cpp
+++ b/src/core/IKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -48,6 +48,11 @@ BorderSize IKernel::border_size() const
return BorderSize(0);
}
+bool IKernel::is_window_configured() const
+{
+ return !((_window.x().start() == _window.x().end()) && (_window.x().end() == 0));
+}
+
void IKernel::configure(const Window &window)
{
_window = window;
diff --git a/src/core/Validate.cpp b/src/core/Validate.cpp
index bd5e494e94..8bb507921a 100644
--- a/src/core/Validate.cpp
+++ b/src/core/Validate.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -167,7 +167,7 @@ arm_compute::Status arm_compute::error_on_unconfigured_kernel(const char *functi
const arm_compute::IKernel *kernel)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(kernel == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG((kernel->window().x().start() == kernel->window().x().end()) && (kernel->window().x().end() == 0) && (kernel->window().x().step() == 0),
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(!kernel->is_window_configured(),
function, file, line,
"This kernel hasn't been configured.");
return arm_compute::Status{};
diff --git a/src/core/cpu/kernels/CpuElementwiseKernel.cpp b/src/core/cpu/kernels/CpuElementwiseKernel.cpp
index 1ac21acbc0..23e95f72d7 100644
--- a/src/core/cpu/kernels/CpuElementwiseKernel.cpp
+++ b/src/core/cpu/kernels/CpuElementwiseKernel.cpp
@@ -72,9 +72,9 @@ static ElementwiseKernel generate_kernel(UKernelType *ukernel)
template <ArithmeticOperation op>
std::function<void(const ITensor *, const ITensor *, ITensor *, const Window &)>
-configure_arithm_func(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+configure_arithm_func(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
{
- ARM_COMPUTE_UNUSED(input2, output);
+ ARM_COMPUTE_UNUSED(src1, dst);
static ElementwiseKernel kernels[] =
{
#if defined(__ARM_FEATURE_SVE)
@@ -103,7 +103,7 @@ configure_arithm_func(const ITensorInfo *input1, const ITensorInfo *input2, ITen
for(const auto &uk : kernels)
{
- if(uk.is_selected(input1->data_type()))
+ if(uk.is_selected(src0->data_type()))
{
return uk.ukernel;
}
@@ -113,10 +113,10 @@ configure_arithm_func(const ITensorInfo *input1, const ITensorInfo *input2, ITen
}
template <ComparisonOperation op>
-std::function<void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window)>
-configure_comp_func(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+std::function<void(const ITensor *, const ITensor *, ITensor *, const Window &)>
+configure_comp_func(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
{
- ARM_COMPUTE_UNUSED(input2, output);
+ ARM_COMPUTE_UNUSED(src1, dst);
static ElementwiseKernel kernels[] =
{
#if defined(__ARM_FEATURE_SVE)
@@ -148,7 +148,7 @@ configure_comp_func(const ITensorInfo *input1, const ITensorInfo *input2, ITenso
for(const auto &uk : kernels)
{
- if(uk.is_selected(input1->data_type()))
+ if(uk.is_selected(src0->data_type()))
{
return uk.ukernel;
}
@@ -158,45 +158,43 @@ configure_comp_func(const ITensorInfo *input1, const ITensorInfo *input2, ITenso
}
} // namespace
-Status CpuElementwiseKernel::validate_arguments_common(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
+Status CpuElementwiseKernel::validate_arguments_common(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst)
{
- ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input1);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &input2);
+ ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&src0);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&src0, &src1);
- const TensorShape out_shape = TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape());
+ const TensorShape out_shape = TensorShape::broadcast_shape(src0.tensor_shape(), src1.tensor_shape());
ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible");
- // Validate in case of configured output
- if(output.total_size() > 0)
+ // Validate in case of configured dst
+ if(dst.total_size() > 0)
{
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output.tensor_shape(), 0),
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, dst.tensor_shape(), 0),
"Wrong shape for output");
}
return Status{};
}
-void CpuElementwiseKernel::configure_common(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+void CpuElementwiseKernel::configure_common(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
-
- // Configure kernel window
- const TensorShape &out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
-
- // Auto initialize output if not initialized
- auto_init_if_empty(*output, out_shape, 1, input1->data_type());
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src0, src1, dst);
- Window win = calculate_max_window(out_shape);
+ // If any of shapes is dynamic, expect a configured window and dst at run-time.
+ if(src0->is_dynamic() || src1->is_dynamic())
+ {
+ return;
+ }
- ICpuKernel::configure(win);
+ auto shape_and_window = compute_output_shape_and_window(*src0, *src1);
+ auto_init_if_empty(*dst, shape_and_window.first, 1, src0->data_type());
+ ICpuKernel::configure(shape_and_window.second);
}
void CpuElementwiseKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
{
- ARM_COMPUTE_UNUSED(info, window);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
+ ARM_COMPUTE_UNUSED(info);
auto src0 = tensors.get_const_tensor(TensorType::ACL_SRC_0);
auto src1 = tensors.get_const_tensor(TensorType::ACL_SRC_1);
@@ -208,49 +206,49 @@ void CpuElementwiseKernel::run_op(ITensorPack &tensors, const Window &window, co
}
/** Arithmetic operators (min, max, squared_diff) */
-void CpuArithmeticKernel::configure(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+void CpuArithmeticKernel::configure(ArithmeticOperation op, const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
{
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1, *input2, *output));
- configure_common(input1, input2, output);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst));
+ configure_common(src0, src1, dst);
_op = op;
}
-Status CpuArithmeticKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
+Status CpuArithmeticKernel::validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32);
- // Validate in case of configured output
- if(output.total_size() > 0)
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src0, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32);
+ // Validate in case of configured dst
+ if(dst.total_size() > 0)
{
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &output);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&src0, &dst);
}
- return validate_arguments_common(input1, input2, output);
+ return validate_arguments_common(src0, src1, dst);
}
-Status CpuArithmeticKernel::validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+Status CpuArithmeticKernel::validate(ArithmeticOperation op, const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst)
{
ARM_COMPUTE_UNUSED(op);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output));
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src0, src1, dst);
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*src0, *src1, *dst));
return Status{};
}
std::function<CpuElementwiseKernel::ElementwiseFunction>
-CpuArithmeticKernel::get_implementation(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+CpuArithmeticKernel::get_implementation(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
{
switch(_op)
{
case ArithmeticOperation::MAX:
- return configure_arithm_func<ArithmeticOperation::MAX>(input1, input2, output);
+ return configure_arithm_func<ArithmeticOperation::MAX>(src0, src1, dst);
case ArithmeticOperation::MIN:
- return configure_arithm_func<ArithmeticOperation::MIN>(input1, input2, output);
+ return configure_arithm_func<ArithmeticOperation::MIN>(src0, src1, dst);
case ArithmeticOperation::SQUARED_DIFF:
- return configure_arithm_func<ArithmeticOperation::SQUARED_DIFF>(input1, input2, output);
+ return configure_arithm_func<ArithmeticOperation::SQUARED_DIFF>(src0, src1, dst);
case ArithmeticOperation::PRELU:
- return configure_arithm_func<ArithmeticOperation::PRELU>(input1, input2, output);
+ return configure_arithm_func<ArithmeticOperation::PRELU>(src0, src1, dst);
case ArithmeticOperation::DIV:
- return configure_arithm_func<ArithmeticOperation::DIV>(input1, input2, output);
+ return configure_arithm_func<ArithmeticOperation::DIV>(src0, src1, dst);
case ArithmeticOperation::POWER:
- return configure_arithm_func<ArithmeticOperation::POWER>(input1, input2, output);
+ return configure_arithm_func<ArithmeticOperation::POWER>(src0, src1, dst);
default:
ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
}
@@ -259,91 +257,91 @@ CpuArithmeticKernel::get_implementation(const ITensorInfo *input1, const ITensor
/** The division operator */
-void CpuDivisionKernel::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+void CpuDivisionKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
{
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1, *input2, *output));
- configure_common(input1, input2, output);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst));
+ configure_common(src0, src1, dst);
_op = ArithmeticOperation::DIV;
}
-Status CpuDivisionKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
+Status CpuDivisionKernel::validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::S32, DataType::F16, DataType::F32);
- return CpuArithmeticKernel::validate_arguments(input1, input2, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src0, 1, DataType::S32, DataType::F16, DataType::F32);
+ return CpuArithmeticKernel::validate_arguments(src0, src1, dst);
}
-Status CpuDivisionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+Status CpuDivisionKernel::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output));
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src0, src1, dst);
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*src0, *src1, *dst));
return Status{};
}
/** The power operator */
-void CpuPowerKernel::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+void CpuPowerKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
{
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1, *input2, *output));
- configure_common(input1, input2, output);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst));
+ configure_common(src0, src1, dst);
_op = ArithmeticOperation::POWER;
}
-Status CpuPowerKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
+Status CpuPowerKernel::validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::F16, DataType::F32);
- return CpuArithmeticKernel::validate_arguments(input1, input2, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src0, 1, DataType::F16, DataType::F32);
+ return CpuArithmeticKernel::validate_arguments(src0, src1, dst);
}
-Status CpuPowerKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+Status CpuPowerKernel::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output));
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src0, src1, dst);
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*src0, *src1, *dst));
return Status{};
}
/** Comparison operators (equal, not equal, less than, greater than, less than or equal, greater than or equal) */
-void CpuComparisonKernel::configure(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+void CpuComparisonKernel::configure(ComparisonOperation op, const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
{
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1, *input2, *output));
- configure_common(input1, input2, output);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst));
+ configure_common(src0, src1, dst);
_op = op;
}
-Status CpuComparisonKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
+Status CpuComparisonKernel::validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32);
- // Validate in case of configured output
- if(output.total_size() > 0)
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src0, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32);
+ // Validate in case of configured dst
+ if(dst.total_size() > 0)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&dst, 1, DataType::U8);
}
- return validate_arguments_common(input1, input2, output);
+ return validate_arguments_common(src0, src1, dst);
}
-Status CpuComparisonKernel::validate(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+Status CpuComparisonKernel::validate(ComparisonOperation op, const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst)
{
ARM_COMPUTE_UNUSED(op);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output));
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src0, src1, dst);
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*src0, *src1, *dst));
return Status{};
}
std::function<CpuElementwiseKernel::ElementwiseFunction>
-CpuComparisonKernel::get_implementation(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+CpuComparisonKernel::get_implementation(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
{
switch(_op)
{
case ComparisonOperation::Equal:
- return configure_comp_func<ComparisonOperation::Equal>(input1, input2, output);
+ return configure_comp_func<ComparisonOperation::Equal>(src0, src1, dst);
case ComparisonOperation::NotEqual:
- return configure_comp_func<ComparisonOperation::NotEqual>(input1, input2, output);
+ return configure_comp_func<ComparisonOperation::NotEqual>(src0, src1, dst);
case ComparisonOperation::Greater:
- return configure_comp_func<ComparisonOperation::Greater>(input1, input2, output);
+ return configure_comp_func<ComparisonOperation::Greater>(src0, src1, dst);
case ComparisonOperation::GreaterEqual:
- return configure_comp_func<ComparisonOperation::GreaterEqual>(input1, input2, output);
+ return configure_comp_func<ComparisonOperation::GreaterEqual>(src0, src1, dst);
case ComparisonOperation::Less:
- return configure_comp_func<ComparisonOperation::Less>(input1, input2, output);
+ return configure_comp_func<ComparisonOperation::Less>(src0, src1, dst);
case ComparisonOperation::LessEqual:
- return configure_comp_func<ComparisonOperation::LessEqual>(input1, input2, output);
+ return configure_comp_func<ComparisonOperation::LessEqual>(src0, src1, dst);
default:
ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
}
diff --git a/src/core/cpu/kernels/CpuElementwiseKernel.h b/src/core/cpu/kernels/CpuElementwiseKernel.h
index 92cf880172..952c6e3e25 100644
--- a/src/core/cpu/kernels/CpuElementwiseKernel.h
+++ b/src/core/cpu/kernels/CpuElementwiseKernel.h
@@ -37,7 +37,7 @@ namespace kernels
/** Interface for an element-wise operation kernel
*
* Element-wise operation is computed by:
- * @f[ output(x,y) = OP(input1(x,y), input2(x,y))@f]
+ * @f[ dst(x,y) = OP(src0(x,y), src1(x,y))@f]
*
*/
class CpuElementwiseKernel : public ICpuKernel
@@ -53,9 +53,9 @@ public:
/** Common signature for all the specialised arithmetic functions
*
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: Dependent on subclass.
+ * @param[in] src0 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
+ * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[out] dst Output tensor info. Data types supported: Dependent on subclass.
* @param[in] window Region on which to execute the kernel.
*/
using ElementwiseFunction = void(const ITensor *, const ITensor *, ITensor *, const Window &);
@@ -66,26 +66,26 @@ public:
protected:
/** Validate the argument passed to the kernel
*
- * @param[in] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
- * @param[in] output Output tensor. Data types supported: Dependent on subclass.
+ * @param[in] src0 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32.
+ * @param[in] src1 Second tensor input. Data types supported: Same as @p src0.
+ * @param[in] dst Output tensor. Data types supported: Dependent on subclass.
*/
- static Status validate_arguments_common(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
+ static Status validate_arguments_common(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst);
/** Commmon configure function for element-wise operators with no additional options (e.g. Min, Max, SquaredDiff)
*
*/
- void configure_common(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+ void configure_common(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst);
/** Function to get the micro kernel implementation
*
- * @param[in] input1 First input tensor information
- * @param[in] input2 Second input tensor information
- * @param[in] output Output tensor information
+ * @param[in] src0 First input tensor information
+ * @param[in] src1 Second input tensor information
+ * @param[in] dst Output tensor information
*
* @return the function instance for the micro kernel
*/
- virtual std::function<ElementwiseFunction> get_implementation(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) = 0;
+ virtual std::function<ElementwiseFunction> get_implementation(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) = 0;
};
class CpuArithmeticKernel : public CpuElementwiseKernel
@@ -96,40 +96,40 @@ public:
/** Configure kernel
*
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in] op Arithmetic operation to be executed.
+ * @param[in] src0 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
+ * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[out] dst Output tensor info. Data types supported: Same as @p src0.
*/
- void configure(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+ void configure(ArithmeticOperation op, const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst);
/** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel
*
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in] op Arithmetic operation to be executed.
+ * @param[in] src0 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
+ * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[in] dst Output tensor info. Data types supported: Same as @p src0.
*
* @return a Status
*/
- static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+ static Status validate(ArithmeticOperation op, const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst);
protected:
// Inherited methods overridden:
- static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
+ static Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst);
ArithmeticOperation _op{};
private:
/** Function to get the micro kernel implementation
*
- * @param[in] input1 First input tensor information
- * @param[in] input2 Second input tensor information
- * @param[in] output Output tensor information
+ * @param[in] src0 First input tensor information
+ * @param[in] src1 Second input tensor information
+ * @param[in] dst Output tensor information
*
* @return the function instance for the micro kernel
*/
- std::function<ElementwiseFunction> get_implementation(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) override;
+ std::function<ElementwiseFunction> get_implementation(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) override;
};
class CpuDivisionKernel : public CpuArithmeticKernel
@@ -140,25 +140,25 @@ public:
/** Configure kernel
*
- * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in] src0 First tensor input info. Data types supported: S32/F16/F32.
+ * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[out] dst Output tensor info. Data types supported: Same as @p src0.
*/
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+ void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst);
/** Static function to check if given info will lead to a valid configuration of @ref CpuDivisionKernel
*
- * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in] src0 First tensor input info. Data types supported: S32/F16/F32.
+ * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[in] dst Output tensor info. Data types supported: Same as @p src0.
*
* @return a Status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst);
protected:
// Inherited methods overridden:
- static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
+ static Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst);
};
class CpuPowerKernel : public CpuArithmeticKernel
@@ -169,25 +169,25 @@ public:
/** Configure kernel
*
- * @param[in] input1 First tensor input info. Data types supported: F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in] src0 First tensor input info. Data types supported: F16/F32.
+ * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[out] dst Output tensor info. Data types supported: Same as @p src0.
*/
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+ void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst);
/** Static function to check if given info will lead to a valid configuration of @ref CpuPowerKernel
*
- * @param[in] input1 First tensor input info. Data types supported: F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in] src0 First tensor input info. Data types supported: F16/F32.
+ * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[in] dst Output tensor info. Data types supported: Same as @p src0.
*
* @return a Status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst);
protected:
// Inherited methods overridden:
- static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
+ static Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst);
};
class CpuComparisonKernel : public CpuElementwiseKernel
@@ -198,38 +198,38 @@ public:
/** Configure kernel
*
- * @param[in] op Comparison operation to be executed.
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: U8.
+ * @param[in] op Comparison operation to be executed.
+ * @param[in] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[out] dst Output tensor info. Data types supported: U8.
*/
- void configure(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+ void configure(ComparisonOperation op, const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst);
/** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuComparisonKernel
*
- * @param[in] op Comparison operation to be executed.
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: U8.
+ * @param[in] op Comparison operation to be executed.
+ * @param[in] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[in] dst Output tensor info. Data types supported: U8.
*
* @return a Status
*/
- static Status validate(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+ static Status validate(ComparisonOperation op, const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst);
protected:
// Inherited methods overridden:
- static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
+ static Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst);
private:
/** Function to get the micro kernel implementation
*
- * @param[in] input1 First input tensor information
- * @param[in] input2 Second input tensor information
- * @param[in] output Output tensor information
+ * @param[in] src0 First input tensor information
+ * @param[in] src1 Second input tensor information
+ * @param[in] dst Output tensor information
*
* @return the function instance for the micro kernel
*/
- std::function<ElementwiseFunction> get_implementation(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) override;
+ std::function<ElementwiseFunction> get_implementation(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) override;
ComparisonOperation _op{};
};
diff --git a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp
index 2b5c11f8e1..ff2d080c95 100644
--- a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp
+++ b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp
@@ -108,28 +108,28 @@ CpuElementwiseUnaryKernel::CpuElementwiseUnaryKernel()
{
}
-void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo &input, ITensorInfo &output)
+void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo &src, ITensorInfo &dst)
{
- ARM_COMPUTE_ERROR_THROW_ON(validate(op, input, output));
-
- // Configure kernel window
- const TensorShape &out_shape = TensorShape::broadcast_shape(input.tensor_shape());
-
- // Auto initialize output if not initialized
- auto_init_if_empty(output, out_shape, 1, input.data_type());
-
- Window win = calculate_max_window(out_shape);
+ ARM_COMPUTE_ERROR_THROW_ON(validate(op, src, dst));
_op = op;
- ICpuKernel::configure(win);
+ // If input shape is dynamic, expect a configured window and dst at run-time.
+ if(src.is_dynamic())
+ {
+ return;
+ }
+
+ auto shape_and_window = compute_output_shape_and_window(src);
+ auto_init_if_empty(dst, shape_and_window.first, 1, src.data_type());
+ ICpuKernel::configure(shape_and_window.second);
}
-Status CpuElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInfo &input, const ITensorInfo &output)
+Status CpuElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInfo &src, const ITensorInfo &dst)
{
- ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input);
+ ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&src);
- const auto *uk = get_implementation(input.data_type());
+ const auto *uk = get_implementation(src.data_type());
ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
switch(op)
@@ -139,19 +139,19 @@ Status CpuElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInf
case ElementWiseUnary::LOG:
case ElementWiseUnary::ROUND:
case ElementWiseUnary::SIN:
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src, 1, DataType::F16, DataType::F32);
break;
case ElementWiseUnary::NEG:
case ElementWiseUnary::ABS:
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::F16, DataType::F32, DataType::S32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src, 1, DataType::F16, DataType::F32, DataType::S32);
break;
default:
ARM_COMPUTE_ERROR("ElementWiseUnary operation not supported");
}
- // Validate in case of configured output
- if(output.total_size() > 0)
+ // Validate in case of configured dst
+ if(dst.total_size() > 0)
{
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input, &output);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&src, &dst);
}
return Status{};
@@ -160,8 +160,6 @@ Status CpuElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInf
void CpuElementwiseUnaryKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
auto dst = tensors.get_tensor(TensorType::ACL_DST);
diff --git a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.h b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.h
index 193f6f1e4f..ceb90dcf70 100644
--- a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.h
+++ b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.h
@@ -38,7 +38,7 @@ namespace kernels
/** Interface for an element-wise unary operation kernel
*
* Element-wise operation is computed by:
- * @f[ output(x) = OP(input(x))@f]
+ * @f[ dst(x) = OP(src(x))@f]
*
*/
class CpuElementwiseUnaryKernel : public ICpuKernel
@@ -56,21 +56,21 @@ public:
/** Function to configure the @ref CpuElementwiseUnaryKernel
*
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input First tensor input. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
+ * @param[in] op Arithmetic operation to be executed.
+ * @param[in] src First tensor input. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
+ * @param[out] dst Output tensor. Data types supported: Same as @p src.
*/
- void configure(ElementWiseUnary op, const ITensorInfo &input, ITensorInfo &output);
+ void configure(ElementWiseUnary op, const ITensorInfo &src, ITensorInfo &dst);
/** Static function to check if given info will lead to a valid configuration of @ref CpuElementwiseUnaryKernel
*
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input First tensor input info. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
+ * @param[in] op Arithmetic operation to be executed.
+ * @param[in] src First tensor input info. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
+ * @param[in] dst Output tensor info. Data types supported: Same as @p src.
*
* @return a Status
*/
- static Status validate(ElementWiseUnary op, const ITensorInfo &input, const ITensorInfo &output);
+ static Status validate(ElementWiseUnary op, const ITensorInfo &src, const ITensorInfo &dst);
// Inherited methods overridden:
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
diff --git a/src/core/helpers/WindowHelpers.h b/src/core/helpers/WindowHelpers.h
index 9216c33f16..637e9323ab 100644
--- a/src/core/helpers/WindowHelpers.h
+++ b/src/core/helpers/WindowHelpers.h
@@ -177,6 +177,22 @@ inline Window calculate_max_enlarged_window(const ITensorInfo &info, const Steps
{
return calculate_max_enlarged_window(info.valid_region(), steps, border_size);
}
+
+/** Function to compute the shape of output and window for the given inputs
+ *
+ * @param[in] infos Input tensor informations
+ *
+ * @return A pair of the shape and window
+ */
+template <typename... Infos>
+std::pair<TensorShape, Window> compute_output_shape_and_window(const Infos &... infos)
+{
+ const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(infos...);
+ const TensorShape &out_shape = broadcast_pair.first;
+ const ValidRegion &valid_region = broadcast_pair.second;
+
+ return std::make_pair(out_shape, calculate_max_window(valid_region));
+}
#endif /* DOXYGEN_SKIP_THIS */
} // namespace arm_compute
diff --git a/src/runtime/NEON/INEOperator.cpp b/src/runtime/NEON/INEOperator.cpp
index ccee8ffc21..a5fc0a2726 100644
--- a/src/runtime/NEON/INEOperator.cpp
+++ b/src/runtime/NEON/INEOperator.cpp
@@ -44,7 +44,12 @@ void INEOperator::run(ITensorPack &tensors)
ARM_COMPUTE_ERROR("No inputs provided");
}
- NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, _kernel->window(), tensors);
+ run(tensors, _kernel->window());
+}
+
+void INEOperator::run(ITensorPack &tensors, const Window &window)
+{
+ NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, window, tensors);
}
void INEOperator::prepare(ITensorPack &constants)
diff --git a/src/runtime/cpu/operators/CpuElementwise.cpp b/src/runtime/cpu/operators/CpuElementwise.cpp
index 322bd09c43..b5c8dde925 100644
--- a/src/runtime/cpu/operators/CpuElementwise.cpp
+++ b/src/runtime/cpu/operators/CpuElementwise.cpp
@@ -23,95 +23,111 @@
*/
#include "src/runtime/cpu/operators/CpuElementwise.h"
#include "src/core/cpu/kernels/CpuElementwiseKernel.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
namespace cpu
{
-void CpuElementwiseMax::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+void CpuElementwiseBase::run(ITensorPack &tensors)
+{
+ // If the kernel has been configured, use the window from the kernel.
+ if(_kernel->is_window_configured())
+ {
+ ICpuOperator::run(tensors);
+ return;
+ }
+
+ auto src0_info = tensors.get_const_tensor(TensorType::ACL_SRC_0)->info();
+ auto src1_info = tensors.get_const_tensor(TensorType::ACL_SRC_1)->info();
+ auto shape_and_window = compute_output_shape_and_window(*src0_info, *src1_info);
+ ICpuOperator::run(tensors, shape_and_window.second);
+}
+
+void CpuElementwiseMax::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
{
auto k = std::make_unique<kernels::CpuArithmeticKernel>();
- k->configure(ArithmeticOperation::MAX, input1, input2, output);
+ k->configure(ArithmeticOperation::MAX, src0, src1, dst);
_kernel = std::move(k);
}
-Status CpuElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+Status CpuElementwiseMax::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst)
{
- return kernels::CpuArithmeticKernel::validate(ArithmeticOperation::MAX, input1, input2, output);
+ return kernels::CpuArithmeticKernel::validate(ArithmeticOperation::MAX, src0, src1, dst);
}
-void CpuElementwiseMin::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+void CpuElementwiseMin::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
{
auto k = std::make_unique<kernels::CpuArithmeticKernel>();
- k->configure(ArithmeticOperation::MIN, input1, input2, output);
+ k->configure(ArithmeticOperation::MIN, src0, src1, dst);
_kernel = std::move(k);
}
-Status CpuElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+Status CpuElementwiseMin::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst)
{
- return kernels::CpuArithmeticKernel::validate(ArithmeticOperation::MIN, input1, input2, output);
+ return kernels::CpuArithmeticKernel::validate(ArithmeticOperation::MIN, src0, src1, dst);
}
-void CpuElementwiseSquaredDiff::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+void CpuElementwiseSquaredDiff::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
{
auto k = std::make_unique<kernels::CpuArithmeticKernel>();
- k->configure(ArithmeticOperation::SQUARED_DIFF, input1, input2, output);
+ k->configure(ArithmeticOperation::SQUARED_DIFF, src0, src1, dst);
_kernel = std::move(k);
}
-Status CpuElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+Status CpuElementwiseSquaredDiff::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst)
{
- return kernels::CpuArithmeticKernel::validate(ArithmeticOperation::SQUARED_DIFF, input1, input2, output);
+ return kernels::CpuArithmeticKernel::validate(ArithmeticOperation::SQUARED_DIFF, src0, src1, dst);
}
-void CpuElementwiseDivision::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+void CpuElementwiseDivision::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
{
auto k = std::make_unique<kernels::CpuDivisionKernel>();
- k->configure(input1, input2, output);
+ k->configure(src0, src1, dst);
_kernel = std::move(k);
}
-Status CpuElementwiseDivision::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+Status CpuElementwiseDivision::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst)
{
- return kernels::CpuDivisionKernel::validate(input1, input2, output);
+ return kernels::CpuDivisionKernel::validate(src0, src1, dst);
}
-void CpuElementwisePower::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+void CpuElementwisePower::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
{
auto k = std::make_unique<kernels::CpuPowerKernel>();
- k->configure(input1, input2, output);
+ k->configure(src0, src1, dst);
_kernel = std::move(k);
}
-Status CpuElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+Status CpuElementwisePower::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst)
{
- return kernels::CpuPowerKernel::validate(input1, input2, output);
+ return kernels::CpuPowerKernel::validate(src0, src1, dst);
}
template <ComparisonOperation COP>
-void CpuElementwiseComparisonStatic<COP>::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+void CpuElementwiseComparisonStatic<COP>::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
{
auto k = std::make_unique<kernels::CpuComparisonKernel>();
- k->configure(COP, input1, input2, output);
+ k->configure(COP, src0, src1, dst);
_kernel = std::move(k);
}
template <ComparisonOperation COP>
-Status CpuElementwiseComparisonStatic<COP>::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+Status CpuElementwiseComparisonStatic<COP>::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst)
{
- return kernels::CpuComparisonKernel::validate(COP, input1, input2, output);
+ return kernels::CpuComparisonKernel::validate(COP, src0, src1, dst);
}
-void CpuElementwiseComparison::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ComparisonOperation op)
+void CpuElementwiseComparison::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst, ComparisonOperation op)
{
auto k = std::make_unique<kernels::CpuComparisonKernel>();
- k->configure(op, input1, input2, output);
+ k->configure(op, src0, src1, dst);
_kernel = std::move(k);
}
-Status CpuElementwiseComparison::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op)
+Status CpuElementwiseComparison::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, ComparisonOperation op)
{
- return kernels::CpuComparisonKernel::validate(op, input1, input2, output);
+ return kernels::CpuComparisonKernel::validate(op, src0, src1, dst);
}
// Supported Specializations
diff --git a/src/runtime/cpu/operators/CpuElementwise.h b/src/runtime/cpu/operators/CpuElementwise.h
index 611a374c26..4b350d5f9f 100644
--- a/src/runtime/cpu/operators/CpuElementwise.h
+++ b/src/runtime/cpu/operators/CpuElementwise.h
@@ -30,30 +30,36 @@ namespace arm_compute
{
namespace cpu
{
+class CpuElementwiseBase : public ICpuOperator
+{
+public:
+ // Inherited methods overridden:
+ void run(ITensorPack &tensors) override;
+};
/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for max
*
* @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @note The function performs a max operation between two tensors.
*/
-class CpuElementwiseMax : public ICpuOperator
+class CpuElementwiseMax : public CpuElementwiseBase
{
public:
- /** Initialise the kernel's inputs, output and conversion policy.
+ /** Initialise the kernel's inputs, dst and conversion policy.
*
- * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in, out] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in, out] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[out] dst Output tensor info. Data types supported: Same as @p src0.
*/
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+ void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst);
/** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for max
*
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[in] dst Output tensor info. Data types supported: Same as @p src0.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst);
};
/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for min
@@ -61,25 +67,25 @@ public:
* @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @note The function performs a min operation between two tensors.
*/
-class CpuElementwiseMin : public ICpuOperator
+class CpuElementwiseMin : public CpuElementwiseBase
{
public:
- /** Initialise the kernel's inputs, output and conversion policy.
+ /** Initialise the kernel's inputs, dst and conversion policy.
*
- * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in, out] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in, out] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[out] dst Output tensor info. Data types supported: Same as @p src0.
*/
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+ void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst);
/** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for min
*
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[in] dst Output tensor info. Data types supported: Same as @p src0.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst);
};
/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for squared difference
@@ -87,25 +93,25 @@ public:
* @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2
*/
-class CpuElementwiseSquaredDiff : public ICpuOperator
+class CpuElementwiseSquaredDiff : public CpuElementwiseBase
{
public:
- /** Initialise the kernel's inputs, output and conversion policy.
+ /** Initialise the kernel's inputs, dst and conversion policy.
*
- * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in, out] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in, out] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[out] dst Output tensor info. Data types supported: Same as @p src0.
*/
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+ void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst);
/** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for squared difference
*
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[in] dst Output tensor info. Data types supported: Same as @p src0.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst);
};
/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for division
@@ -113,25 +119,25 @@ public:
* @note The tensor data type for the inputs must be S32/F16/F32.
* @note The function performs a division operation between two tensors (i.e., out[i] = in1[i] / in2[i])
*/
-class CpuElementwiseDivision : public ICpuOperator
+class CpuElementwiseDivision : public CpuElementwiseBase
{
public:
- /** Initialise the kernel's inputs, output and conversion policy.
+ /** Initialise the kernel's inputs, dst and conversion policy.
*
- * @param[in, out] input1 First tensor input info. Data types supported: S32/F16/F32.
- * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in, out] src0 First tensor input info. Data types supported: S32/F16/F32.
+ * @param[in, out] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[out] dst Output tensor info. Data types supported: Same as @p src0.
*/
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+ void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst);
/** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for division
*
- * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in] src0 First tensor input info. Data types supported: S32/F16/F32.
+ * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[in] dst Output tensor info. Data types supported: Same as @p src0.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst);
};
/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for power
@@ -140,25 +146,25 @@ public:
* @note The function performs a elementwise power of in1 to in2 (i.e., out[i] = in1[i] ^ in2[i])
* @note For an exponent that is a float, this function will only work with a positive base.
*/
-class CpuElementwisePower : public ICpuOperator
+class CpuElementwisePower : public CpuElementwiseBase
{
public:
- /** Initialise the kernel's inputs, output and conversion policy.
+ /** Initialise the kernel's inputs, dst and conversion policy.
*
- * @param[in, out] input1 First tensor input info. Data types supported: F16/F32.
- * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in, out] src0 First tensor input info. Data types supported: F16/F32.
+ * @param[in, out] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[out] dst Output tensor info. Data types supported: Same as @p src0.
*/
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+ void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst);
/** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for power
*
- * @param[in] input1 First tensor input info. Data types supported: F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in] src0 First tensor input info. Data types supported: F16/F32.
+ * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[in] dst Output tensor info. Data types supported: Same as @p src0.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst);
};
/** Basic function to run @ref cpu::kernels::CpuComparisonKernel.
@@ -166,27 +172,27 @@ public:
* @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @note The function performs a comparison operation between two tensors.
*/
-class CpuElementwiseComparison : public ICpuOperator
+class CpuElementwiseComparison : public CpuElementwiseBase
{
public:
- /** Initialise the kernel's inputs, output and conversion policy.
+ /** Initialise the kernel's inputs, dst and conversion policy.
*
- * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: U16/U32.
- * @param[in] op Comparison Operation to be performed.
+ * @param[in, out] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in, out] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[out] dst Output tensor info. Data types supported: U16/U32.
+ * @param[in] op Comparison Operation to be performed.
*/
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ComparisonOperation op);
+ void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst, ComparisonOperation op);
/** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuComparisonKernel
*
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: U16/U32.
- * @param[in] op Comparison Operation to be performed.
+ * @param[in] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[in] dst Output tensor info. Data types supported: U16/U32.
+ * @param[in] op Comparison Operation to be performed.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op);
+ static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, ComparisonOperation op);
};
/** Basic function to run @ref cpu::kernels::CpuComparisonKernel
@@ -195,25 +201,25 @@ public:
* @note The function performs a comparison operation between two tensors.
*/
template <ComparisonOperation op>
-class CpuElementwiseComparisonStatic : public ICpuOperator
+class CpuElementwiseComparisonStatic : public CpuElementwiseBase
{
public:
- /** Initialise the kernel's inputs, output and conversion policy.
+ /** Initialise the kernel's inputs, dst and conversion policy.
*
- * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: U16/U32.
+ * @param[in, out] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in, out] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[out] dst Output tensor info. Data types supported: U16/U32.
*/
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+ void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst);
/** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuComparisonKernel
*
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: U16/U32.
+ * @param[in] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0.
+ * @param[in] dst Output tensor info. Data types supported: U16/U32.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst);
};
/** Basic function to run equal comparison. */
diff --git a/src/runtime/cpu/operators/CpuElementwiseUnary.cpp b/src/runtime/cpu/operators/CpuElementwiseUnary.cpp
index d1b1700927..2140c5cf78 100644
--- a/src/runtime/cpu/operators/CpuElementwiseUnary.cpp
+++ b/src/runtime/cpu/operators/CpuElementwiseUnary.cpp
@@ -23,6 +23,7 @@
*/
#include "src/runtime/cpu/operators/CpuElementwiseUnary.h"
#include "src/core/cpu/kernels/CpuElementwiseUnaryKernel.h"
+#include "src/core/helpers/WindowHelpers.h"
namespace arm_compute
{
@@ -41,5 +42,17 @@ Status CpuElementwiseUnary::validate(ElementWiseUnary op, const ITensorInfo &src
{
return KernelType::validate(op, src, dst);
}
+
+void CpuElementwiseUnary::run(ITensorPack &tensors)
+{
+ if(_kernel->is_window_configured())
+ {
+ ICpuOperator::run(tensors);
+ return;
+ }
+
+ auto src_info = tensors.get_const_tensor(TensorType::ACL_SRC)->info();
+ ICpuOperator::run(tensors, compute_output_shape_and_window(*src_info).second);
+}
} // namespace cpu
} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/cpu/operators/CpuElementwiseUnary.h b/src/runtime/cpu/operators/CpuElementwiseUnary.h
index 0b2a9e730d..721ba2a85b 100644
--- a/src/runtime/cpu/operators/CpuElementwiseUnary.h
+++ b/src/runtime/cpu/operators/CpuElementwiseUnary.h
@@ -50,6 +50,9 @@ public:
* @return a status
*/
static Status validate(ElementWiseUnary op, const ITensorInfo &src, const ITensorInfo &dst);
+
+ // Inherited methods overridden:
+ void run(ITensorPack &tensors) override;
};
} // namespace cpu