From 668ccdcfb81bfab3a2d44cd1ddd956e83a2dfb09 Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Wed, 3 Feb 2021 10:32:59 +0000 Subject: Add dynamic tensor support to CpuElementwise The kernels and operators for binary and unary operations are now capable of being configured with dynamic shapes and computing windows at run-time. Additionally, changing arguments' names is done for consistency. Partially Implements: COMPMID-4127 Change-Id: I48e5038692db667dec7cb2b2906fe5683214fe19 Signed-off-by: Sang-Hoon Park Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4973 Tested-by: Arm Jenkins Reviewed-by: Pablo Marquez Tello Comments-Addressed: Arm Jenkins --- src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp | 40 ++++++++++------------ 1 file changed, 19 insertions(+), 21 deletions(-) (limited to 'src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp') diff --git a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp index 2b5c11f8e1..ff2d080c95 100644 --- a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp +++ b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp @@ -108,28 +108,28 @@ CpuElementwiseUnaryKernel::CpuElementwiseUnaryKernel() { } -void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo &input, ITensorInfo &output) +void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo &src, ITensorInfo &dst) { - ARM_COMPUTE_ERROR_THROW_ON(validate(op, input, output)); - - // Configure kernel window - const TensorShape &out_shape = TensorShape::broadcast_shape(input.tensor_shape()); - - // Auto initialize output if not initialized - auto_init_if_empty(output, out_shape, 1, input.data_type()); - - Window win = calculate_max_window(out_shape); + ARM_COMPUTE_ERROR_THROW_ON(validate(op, src, dst)); _op = op; - ICpuKernel::configure(win); + // If input shape is dynamic, expect a configured window and dst at run-time. + if(src.is_dynamic()) + { + return; + } + + auto shape_and_window = compute_output_shape_and_window(src); + auto_init_if_empty(dst, shape_and_window.first, 1, src.data_type()); + ICpuKernel::configure(shape_and_window.second); } -Status CpuElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInfo &input, const ITensorInfo &output) +Status CpuElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInfo &src, const ITensorInfo &dst) { - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input); + ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&src); - const auto *uk = get_implementation(input.data_type()); + const auto *uk = get_implementation(src.data_type()); ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); switch(op) @@ -139,19 +139,19 @@ Status CpuElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInf case ElementWiseUnary::LOG: case ElementWiseUnary::ROUND: case ElementWiseUnary::SIN: - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src, 1, DataType::F16, DataType::F32); break; case ElementWiseUnary::NEG: case ElementWiseUnary::ABS: - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::F16, DataType::F32, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src, 1, DataType::F16, DataType::F32, DataType::S32); break; default: ARM_COMPUTE_ERROR("ElementWiseUnary operation not supported"); } - // Validate in case of configured output - if(output.total_size() > 0) + // Validate in case of configured dst + if(dst.total_size() > 0) { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input, &output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&src, &dst); } return Status{}; @@ -160,8 +160,6 @@ Status CpuElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInf void CpuElementwiseUnaryKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window); auto src = tensors.get_const_tensor(TensorType::ACL_SRC); auto dst = tensors.get_tensor(TensorType::ACL_DST); -- cgit v1.2.1