From c7b1e84ac5f3ada1b2f78c66979ef4d44804a955 Mon Sep 17 00:00:00 2001 From: SiCongLi Date: Mon, 22 Feb 2021 14:28:33 +0000 Subject: Remove usage of valid window region in NHWC CPU kernels - Part1 Replace all calculate_max_window(ValidRegion, ...) with calculate_max_window(TensorShape, ...) in CPU kernels Resolves COMPMID-4152 (1/2) Change-Id: I7403ea6b24b9e7889890839142a06439d6c8a499 Signed-off-by: SiCongLi Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5202 Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- src/core/NEON/kernels/NELogicalKernel.cpp | 5 +- .../kernels/NEPixelWiseMultiplicationKernel.cpp | 20 ++------ src/core/NEON/kernels/NESelectKernel.cpp | 2 +- src/core/cpu/kernels/CpuAddKernel.cpp | 9 +--- src/core/cpu/kernels/CpuElementwiseKernel.cpp | 6 +-- src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp | 6 +-- src/core/cpu/kernels/CpuSubKernel.cpp | 9 +--- src/core/helpers/WindowHelpers.cpp | 53 +++++++++++++++++++++- src/core/helpers/WindowHelpers.h | 15 +++++- 9 files changed, 81 insertions(+), 44 deletions(-) (limited to 'src/core') diff --git a/src/core/NEON/kernels/NELogicalKernel.cpp b/src/core/NEON/kernels/NELogicalKernel.cpp index d98694ffe1..e1c24da777 100644 --- a/src/core/NEON/kernels/NELogicalKernel.cpp +++ b/src/core/NEON/kernels/NELogicalKernel.cpp @@ -287,9 +287,8 @@ void NELogicalKernel::configure(const ITensorInfo *input1, const ITensorInfo *in if(op != LogicalOperation::Not) { ARM_COMPUTE_ERROR_ON_NULLPTR(input2); - const std::pair broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*input1, *input2); - out_shape = broadcast_pair.first; - win = calculate_max_window(broadcast_pair.second, Steps()); + out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); + win = calculate_max_window(out_shape, Steps()); } ICPPKernel::configure(win); diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp index 6661326ea8..b287e18281 100644 --- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp +++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -1486,9 +1486,7 @@ void NEPixelWiseMultiplicationKernel::configure(ITensorInfo *input1, ITensorInfo ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1, input2, output, scale, overflow_policy, rounding_policy)); - const std::pair broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*input1, *input2); - const TensorShape &out_shape = broadcast_pair.first; - const ValidRegion &valid_region = broadcast_pair.second; + const TensorShape &out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); // Auto initialize output if not initialized set_shape_if_empty(*output, out_shape); @@ -1624,10 +1622,7 @@ void NEPixelWiseMultiplicationKernel::configure(ITensorInfo *input1, ITensorInfo } // Configure kernel window - Coordinates coord; - coord.set_num_dimensions(output->num_dimensions()); - output->set_valid_region(valid_region); - Window win = calculate_max_window(valid_region, Steps()); + Window win = calculate_max_window(out_shape); INEKernel::configure(win); } @@ -1692,19 +1687,14 @@ void NEComplexPixelWiseMultiplicationKernel::configure(ITensorInfo *input1, ITen ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_complex(input1, input2, output)); - const std::pair broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*input1, *input2); - const TensorShape &out_shape = broadcast_pair.first; - const ValidRegion &valid_region = broadcast_pair.second; + const TensorShape &out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); // Auto initialize output if not initialized const TensorInfo out_info(out_shape, input1->num_channels(), input1->data_type()); auto_init_if_empty(*output, out_info); // Configure kernel window - Coordinates coord; - coord.set_num_dimensions(output->num_dimensions()); - output->set_valid_region(valid_region); - Window win = calculate_max_window(valid_region, Steps()); + Window win = calculate_max_window(out_shape); INEKernel::configure(win); } diff --git a/src/core/NEON/kernels/NESelectKernel.cpp b/src/core/NEON/kernels/NESelectKernel.cpp index 1d5f2b61a1..22cd442889 100644 --- a/src/core/NEON/kernels/NESelectKernel.cpp +++ b/src/core/NEON/kernels/NESelectKernel.cpp @@ -224,7 +224,7 @@ void NESelectKernel::configure(const ITensor *c, const ITensor *x, const ITensor _function = it->second; } - Window win = calculate_max_window(x->info()->valid_region()); + Window win = calculate_max_window(*x->info()); INEKernel::configure(win); } diff --git a/src/core/cpu/kernels/CpuAddKernel.cpp b/src/core/cpu/kernels/CpuAddKernel.cpp index 31c7b2af60..fc88a7e22d 100644 --- a/src/core/cpu/kernels/CpuAddKernel.cpp +++ b/src/core/cpu/kernels/CpuAddKernel.cpp @@ -249,9 +249,7 @@ Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, cons std::pair validate_and_configure_window(const ITensorInfo &src0, const ITensorInfo &src1, ITensorInfo &dst) { - const std::pair broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(src0, src1); - const TensorShape &out_shape = broadcast_pair.first; - const ValidRegion &valid_region = broadcast_pair.second; + const TensorShape &out_shape = TensorShape::broadcast_shape(src0.tensor_shape(), src1.tensor_shape()); // Auto initialize dst if not initialized { @@ -287,12 +285,9 @@ std::pair validate_and_configure_window(const ITensorInfo &src0, } } - Window win = calculate_max_window(valid_region, Steps()); + Window win = calculate_max_window(out_shape, Steps()); // CpuAddKernel doesn't need padding so update_window_and_padding() can be skipped - Coordinates coord; - coord.set_num_dimensions(dst.num_dimensions()); - dst.set_valid_region(valid_region); return std::make_pair(Status{}, win); } } // namespace diff --git a/src/core/cpu/kernels/CpuElementwiseKernel.cpp b/src/core/cpu/kernels/CpuElementwiseKernel.cpp index ab915b9d72..1ac21acbc0 100644 --- a/src/core/cpu/kernels/CpuElementwiseKernel.cpp +++ b/src/core/cpu/kernels/CpuElementwiseKernel.cpp @@ -182,14 +182,12 @@ void CpuElementwiseKernel::configure_common(const ITensorInfo *input1, const ITe ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); // Configure kernel window - const std::pair broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*input1, *input2); - const TensorShape &out_shape = broadcast_pair.first; - const ValidRegion &valid_region = broadcast_pair.second; + const TensorShape &out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); // Auto initialize output if not initialized auto_init_if_empty(*output, out_shape, 1, input1->data_type()); - Window win = calculate_max_window(valid_region); + Window win = calculate_max_window(out_shape); ICpuKernel::configure(win); } diff --git a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp index d2681bb060..2b5c11f8e1 100644 --- a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp +++ b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp @@ -113,14 +113,12 @@ void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo ARM_COMPUTE_ERROR_THROW_ON(validate(op, input, output)); // Configure kernel window - const std::pair broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(input); - const TensorShape &out_shape = broadcast_pair.first; - const ValidRegion &valid_region = broadcast_pair.second; + const TensorShape &out_shape = TensorShape::broadcast_shape(input.tensor_shape()); // Auto initialize output if not initialized auto_init_if_empty(output, out_shape, 1, input.data_type()); - Window win = calculate_max_window(valid_region); + Window win = calculate_max_window(out_shape); _op = op; diff --git a/src/core/cpu/kernels/CpuSubKernel.cpp b/src/core/cpu/kernels/CpuSubKernel.cpp index a03dcf2353..d7057bbe2b 100644 --- a/src/core/cpu/kernels/CpuSubKernel.cpp +++ b/src/core/cpu/kernels/CpuSubKernel.cpp @@ -201,9 +201,7 @@ void CpuSubKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, I ARM_COMPUTE_ERROR_ON_NULLPTR(src0, src1, dst); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst, policy)); - const std::pair broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*src0, *src1); - const TensorShape &out_shape = broadcast_pair.first; - const ValidRegion &valid_region = broadcast_pair.second; + const TensorShape &out_shape = TensorShape::broadcast_shape(src0->tensor_shape(), src1->tensor_shape()); // Auto initialize dst if not initialized set_shape_if_empty(*dst, out_shape); @@ -211,10 +209,7 @@ void CpuSubKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, I _policy = policy; // CpuSubKernel doesn't need padding so update_window_and_padding() can be skipped - Coordinates coord; - coord.set_num_dimensions(dst->num_dimensions()); - dst->set_valid_region(valid_region); - Window win = calculate_max_window(valid_region, Steps()); + Window win = calculate_max_window(out_shape, Steps()); ICpuKernel::configure(win); } diff --git a/src/core/helpers/WindowHelpers.cpp b/src/core/helpers/WindowHelpers.cpp index ba10eb9775..75ffb71b4b 100644 --- a/src/core/helpers/WindowHelpers.cpp +++ b/src/core/helpers/WindowHelpers.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2020 Arm Limited. +* Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -79,6 +79,57 @@ Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, return window; } +Window calculate_max_window(const TensorShape &shape, const Steps &steps, bool skip_border, BorderSize border_size) +{ + if(!skip_border) + { + border_size = BorderSize(0); + } + + Window window; + + window.set(0, Window::Dimension( + // Skip the border left of the image + border_size.left, + // Skip the border right of the image + // Make sure the window width is a multiple of the step size + border_size.left + ceil_to_multiple(std::max(0, static_cast(shape[0]) - static_cast(border_size.left) - static_cast(border_size.right)), steps[0]), + steps[0])); + + size_t n = 1; + + if(shape.num_dimensions() > 1) + { + window.set(1, Window::Dimension( + // Skip the border above the image + border_size.top, + // Skip the border below the image + border_size.top + ceil_to_multiple(std::max(0, static_cast(shape[1]) - static_cast(border_size.top) - static_cast(border_size.bottom)), steps[1]), + steps[1])); + + ++n; + } + + if(shape.num_dimensions() > 2) + { + window.set(2, Window::Dimension(0, std::max(1, shape[2]), steps[2])); + + ++n; + } + + for(; n < shape.num_dimensions(); ++n) + { + window.set(n, Window::Dimension(0, std::max(1, shape[n]))); + } + + for(; n < Coordinates::num_max_dimensions; ++n) + { + window.set(n, Window::Dimension(0, 1)); + } + + return window; +} + Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Steps &steps, BorderSize border_size) { const Coordinates &anchor = valid_region.anchor; diff --git a/src/core/helpers/WindowHelpers.h b/src/core/helpers/WindowHelpers.h index 9bc2135b6d..9216c33f16 100644 --- a/src/core/helpers/WindowHelpers.h +++ b/src/core/helpers/WindowHelpers.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2020 Arm Limited. +* Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -105,6 +105,17 @@ ValidRegion intersect_valid_regions(const Ts &... regions) */ Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); +/** Calculate the maximum window for a given tensor shape and border setting + * + * @param[in] shape Shape of the tensor space + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] skip_border (Optional) If true exclude the border region from the window. + * @param[in] border_size (Optional) Border size. + * + * @return The maximum window the kernel can be executed on. + */ +Window calculate_max_window(const TensorShape &shape, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); + /** Calculate the maximum window for a given tensor shape and border setting * * @param[in] info Tensor info object defining the shape of the object for which the window is created. @@ -116,7 +127,7 @@ Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps */ inline Window calculate_max_window(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()) { - return calculate_max_window(info.valid_region(), steps, skip_border, border_size); + return calculate_max_window(info.tensor_shape(), steps, skip_border, border_size); } /** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting -- cgit v1.2.1